diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-06-16 11:41:13 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-06-16 11:41:13 +1000 |
commit | b8ea67b73d8a5d9c541217099e11d508d6e486cc (patch) | |
tree | 6ce092ed8dcb2c3655fc7672bb1941ca0906a07d | |
parent | a4c772c8e13209f6996e538beb7c0d2b1df36cd8 (diff) | |
parent | a26868912affb4819b9dd64d0d2e9c6b01b2466c (diff) |
Merge remote branch 'block/for-next'
-rw-r--r-- | block/blk-core.c | 3 | ||||
-rw-r--r-- | block/blk-sysfs.c | 82 | ||||
-rw-r--r-- | drivers/block/cciss.c | 553 | ||||
-rw-r--r-- | drivers/block/cciss.h | 115 | ||||
-rw-r--r-- | drivers/block/cciss_cmd.h | 32 | ||||
-rw-r--r-- | drivers/block/cciss_scsi.c | 5 | ||||
-rw-r--r-- | drivers/block/cpqarray.c | 6 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 6 | ||||
-rw-r--r-- | drivers/block/floppy.c | 144 | ||||
-rw-r--r-- | fs/afs/write.c | 1 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 2 | ||||
-rw-r--r-- | fs/fs-writeback.c | 294 | ||||
-rw-r--r-- | fs/ubifs/budget.c | 2 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 4 | ||||
-rw-r--r-- | include/linux/blkdev.h | 5 | ||||
-rw-r--r-- | include/linux/drbd.h | 2 | ||||
-rw-r--r-- | include/linux/writeback.h | 7 | ||||
-rw-r--r-- | mm/backing-dev.c | 3 | ||||
-rw-r--r-- | mm/page-writeback.c | 8 |
20 files changed, 756 insertions, 520 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index f84cce42fc58..5cb6ca99fd7f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2114,7 +2114,8 @@ static bool blk_update_bidi_request(struct request *rq, int error, blk_update_request(rq->next_rq, error, bidi_bytes)) return true; - add_disk_randomness(rq->rq_disk); + if (blk_queue_add_random(rq->q)) + add_disk_randomness(rq->rq_disk); return false; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 306759bbdf1b..001ab18078f5 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -180,26 +180,36 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) return queue_var_show(max_hw_sectors_kb, (page)); } -static ssize_t queue_nonrot_show(struct request_queue *q, char *page) -{ - return queue_var_show(!blk_queue_nonrot(q), page); +#define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \ +static ssize_t \ +queue_show_##name(struct request_queue *q, char *page) \ +{ \ + int bit; \ + bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags); \ + return queue_var_show(neg ? !bit : bit, page); \ +} \ +static ssize_t \ +queue_store_##name(struct request_queue *q, const char *page, size_t count) \ +{ \ + unsigned long val; \ + ssize_t ret; \ + ret = queue_var_store(&val, page, count); \ + if (neg) \ + val = !val; \ + \ + spin_lock_irq(q->queue_lock); \ + if (val) \ + queue_flag_set(QUEUE_FLAG_##flag, q); \ + else \ + queue_flag_clear(QUEUE_FLAG_##flag, q); \ + spin_unlock_irq(q->queue_lock); \ + return ret; \ } -static ssize_t queue_nonrot_store(struct request_queue *q, const char *page, - size_t count) -{ - unsigned long nm; - ssize_t ret = queue_var_store(&nm, page, count); - - spin_lock_irq(q->queue_lock); - if (nm) - queue_flag_clear(QUEUE_FLAG_NONROT, q); - else - queue_flag_set(QUEUE_FLAG_NONROT, q); - spin_unlock_irq(q->queue_lock); - - return ret; -} +QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1); +QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0); +QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); +#undef QUEUE_SYSFS_BIT_FNS static ssize_t queue_nomerges_show(struct request_queue *q, char *page) { @@ -250,27 +260,6 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) return ret; } -static ssize_t queue_iostats_show(struct request_queue *q, char *page) -{ - return queue_var_show(blk_queue_io_stat(q), page); -} - -static ssize_t queue_iostats_store(struct request_queue *q, const char *page, - size_t count) -{ - unsigned long stats; - ssize_t ret = queue_var_store(&stats, page, count); - - spin_lock_irq(q->queue_lock); - if (stats) - queue_flag_set(QUEUE_FLAG_IO_STAT, q); - else - queue_flag_clear(QUEUE_FLAG_IO_STAT, q); - spin_unlock_irq(q->queue_lock); - - return ret; -} - static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -352,8 +341,8 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { static struct queue_sysfs_entry queue_nonrot_entry = { .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, - .show = queue_nonrot_show, - .store = queue_nonrot_store, + .show = queue_show_nonrot, + .store = queue_store_nonrot, }; static struct queue_sysfs_entry queue_nomerges_entry = { @@ -370,8 +359,14 @@ static struct queue_sysfs_entry queue_rq_affinity_entry = { static struct queue_sysfs_entry queue_iostats_entry = { .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR }, - .show = queue_iostats_show, - .store = queue_iostats_store, + .show = queue_show_iostats, + .store = queue_store_iostats, +}; + +static struct queue_sysfs_entry queue_random_entry = { + .attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR }, + .show = queue_show_random, + .store = queue_store_random, }; static struct attribute *default_attrs[] = { @@ -394,6 +389,7 @@ static struct attribute *default_attrs[] = { &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, + &queue_random_entry.attr, NULL, }; diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 51ceaee98f9f..10a0268a1f92 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -56,16 +56,14 @@ #include <linux/kthread.h> #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin)) -#define DRIVER_NAME "HP CISS Driver (v 3.6.20)" -#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 20) +#define DRIVER_NAME "HP CISS Driver (v 3.6.26)" +#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 26) /* Embedded module documentation macros - see modules.h */ MODULE_AUTHOR("Hewlett-Packard Company"); MODULE_DESCRIPTION("Driver for HP Smart Array Controllers"); -MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400" - " SA6i P600 P800 P400 P400i E200 E200i E500 P700m" - " Smart Array G2 Series SAS/SATA Controllers"); -MODULE_VERSION("3.6.20"); +MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers"); +MODULE_VERSION("3.6.26"); MODULE_LICENSE("GPL"); static int cciss_allow_hpsa; @@ -107,6 +105,11 @@ static const struct pci_device_id cciss_pci_device_id[] = { {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3249}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324A}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324B}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3250}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3251}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3252}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3253}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3254}, {0,} }; @@ -146,6 +149,11 @@ static struct board_type products[] = { {0x3249103C, "Smart Array P812", &SA5_access}, {0x324A103C, "Smart Array P712m", &SA5_access}, {0x324B103C, "Smart Array P711m", &SA5_access}, + {0x3250103C, "Smart Array", &SA5_access}, + {0x3251103C, "Smart Array", &SA5_access}, + {0x3252103C, "Smart Array", &SA5_access}, + {0x3253103C, "Smart Array", &SA5_access}, + {0x3254103C, "Smart Array", &SA5_access}, }; /* How long to wait (in milliseconds) for board to go into simple mode */ @@ -167,7 +175,8 @@ static DEFINE_MUTEX(scan_mutex); static LIST_HEAD(scan_q); static void do_cciss_request(struct request_queue *q); -static irqreturn_t do_cciss_intr(int irq, void *dev_id); +static irqreturn_t do_cciss_intx(int irq, void *dev_id); +static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id); static int cciss_open(struct block_device *bdev, fmode_t mode); static int cciss_release(struct gendisk *disk, fmode_t mode); static int cciss_ioctl(struct block_device *bdev, fmode_t mode, @@ -197,7 +206,6 @@ static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c, int attempt_retry); static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c); -static void fail_all_cmds(unsigned long ctlr); static int add_to_scan_list(struct ctlr_info *h); static int scan_thread(void *data); static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c); @@ -205,6 +213,12 @@ static void cciss_hba_release(struct device *dev); static void cciss_device_release(struct device *dev); static void cciss_free_gendisk(ctlr_info_t *h, int drv_index); static void cciss_free_drive_info(ctlr_info_t *h, int drv_index); +static inline u32 next_command(ctlr_info_t *h); + +/* performant mode helper functions */ +static void calc_bucket_map(int *bucket, int num_buckets, int nsgs, + int *bucket_map); +static void cciss_put_controller_into_performant_mode(ctlr_info_t *h); #ifdef CONFIG_PROC_FS static void cciss_procinit(int i); @@ -231,6 +245,16 @@ static const struct block_device_operations cciss_fops = { .revalidate_disk = cciss_revalidate, }; +/* set_performant_mode: Modify the tag for cciss performant + * set bit 0 for pull model, bits 3-1 for block fetch + * register number + */ +static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c) +{ + if (likely(h->transMethod == CFGTBL_Trans_Performant)) + c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); +} + /* * Enqueuing and dequeuing functions for cmdlists. */ @@ -257,6 +281,18 @@ static inline void removeQ(CommandList_struct *c) hlist_del_init(&c->list); } +static void enqueue_cmd_and_start_io(ctlr_info_t *h, + CommandList_struct *c) +{ + unsigned long flags; + set_performant_mode(h, c); + spin_lock_irqsave(&h->lock, flags); + addQ(&h->reqQ, c); + h->Qdepth++; + start_io(h); + spin_unlock_irqrestore(&h->lock, flags); +} + static void cciss_free_sg_chain_blocks(SGDescriptor_struct **cmd_sg_list, int nr_cmds) { @@ -366,7 +402,7 @@ static void cciss_seq_show_header(struct seq_file *seq) h->product_name, (unsigned long)h->board_id, h->firm_ver[0], h->firm_ver[1], h->firm_ver[2], - h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT], + h->firm_ver[3], (unsigned int)h->intr[PERF_MODE_INT], h->num_luns, h->Qdepth, h->commands_outstanding, h->maxQsinceinit, h->max_outstanding, h->maxSG); @@ -1377,7 +1413,6 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, CommandList_struct *c; char *buff = NULL; u64bit temp64; - unsigned long flags; DECLARE_COMPLETION_ONSTACK(wait); if (!arg) @@ -1449,13 +1484,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, } c->waiting = &wait; - /* Put the request on the tail of the request queue */ - spin_lock_irqsave(CCISS_LOCK(ctlr), flags); - addQ(&host->reqQ, c); - host->Qdepth++; - start_io(host); - spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); - + enqueue_cmd_and_start_io(host, c); wait_for_completion(&wait); /* unlock the buffers from DMA */ @@ -1495,7 +1524,6 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, unsigned char **buff = NULL; int *buff_size = NULL; u64bit temp64; - unsigned long flags; BYTE sg_used = 0; int status = 0; int i; @@ -1602,12 +1630,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, } } c->waiting = &wait; - /* Put the request on the tail of the request queue */ - spin_lock_irqsave(CCISS_LOCK(ctlr), flags); - addQ(&host->reqQ, c); - host->Qdepth++; - start_io(host); - spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); + enqueue_cmd_and_start_io(host, c); wait_for_completion(&wait); /* unlock the buffers from DMA */ for (i = 0; i < sg_used; i++) { @@ -1729,8 +1752,8 @@ static void cciss_softirq_done(struct request *rq) CommandList_struct *cmd = rq->completion_data; ctlr_info_t *h = hba[cmd->ctlr]; SGDescriptor_struct *curr_sg = cmd->SG; - unsigned long flags; u64bit temp64; + unsigned long flags; int i, ddir; int sg_index = 0; @@ -2679,17 +2702,11 @@ static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c, { DECLARE_COMPLETION_ONSTACK(wait); u64bit buff_dma_handle; - unsigned long flags; int return_status = IO_OK; resend_cmd2: c->waiting = &wait; - /* Put the request on the tail of the queue and send it */ - spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); - addQ(&h->reqQ, c); - h->Qdepth++; - start_io(h); - spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + enqueue_cmd_and_start_io(h, c); wait_for_completion(&wait); @@ -3132,6 +3149,34 @@ after_error_processing: blk_complete_request(cmd->rq); } +static inline u32 cciss_tag_contains_index(u32 tag) +{ +#define DIRECT_LOOKUP_BIT 0x10 + return tag & DIRECT_LOOKUP_BIT; +} + +static inline u32 cciss_tag_to_index(u32 tag) +{ +#define DIRECT_LOOKUP_SHIFT 5 + return tag >> DIRECT_LOOKUP_SHIFT; +} + +static inline u32 cciss_tag_discard_error_bits(u32 tag) +{ +#define CCISS_ERROR_BITS 0x03 + return tag & ~CCISS_ERROR_BITS; +} + +static inline void cciss_mark_tag_indexed(u32 *tag) +{ + *tag |= DIRECT_LOOKUP_BIT; +} + +static inline void cciss_set_tag_index(u32 *tag, u32 index) +{ + *tag |= (index << DIRECT_LOOKUP_SHIFT); +} + /* * Get a request and submit it to the controller. */ @@ -3180,8 +3225,8 @@ static void do_cciss_request(struct request_queue *q) /* got command from pool, so use the command block index instead */ /* for direct lookups. */ /* The first 2 bits are reserved for controller error reporting. */ - c->Header.Tag.lower = (c->cmdindex << 3); - c->Header.Tag.lower |= 0x04; /* flag for direct lookup. */ + cciss_set_tag_index(&c->Header.Tag.lower, c->cmdindex); + cciss_mark_tag_indexed(&c->Header.Tag.lower); memcpy(&c->Header.LUN, drv->LunID, sizeof(drv->LunID)); c->Request.CDBLen = 10; /* 12 byte commands not in FW yet; */ c->Request.Type.Type = TYPE_CMD; /* It is a command. */ @@ -3242,9 +3287,12 @@ static void do_cciss_request(struct request_queue *q) blk_rq_sectors(creq), seg, chained); #endif /* CCISS_DEBUG */ - c->Header.SGList = c->Header.SGTotal = seg + chained; - if (seg > h->max_cmd_sgentries) + c->Header.SGTotal = seg + chained; + if (seg <= h->max_cmd_sgentries) + c->Header.SGList = c->Header.SGTotal; + else c->Header.SGList = h->max_cmd_sgentries; + set_performant_mode(h, c); if (likely(blk_fs_request(creq))) { if(h->cciss_read == CCISS_READ_10) { @@ -3313,16 +3361,97 @@ static inline int interrupt_pending(ctlr_info_t *h) static inline long interrupt_not_for_us(ctlr_info_t *h) { - return (((h->access.intr_pending(h) == 0) || - (h->interrupts_enabled == 0))); + return !(h->msi_vector || h->msix_vector) && + ((h->access.intr_pending(h) == 0) || + (h->interrupts_enabled == 0)); } -static irqreturn_t do_cciss_intr(int irq, void *dev_id) +static inline int bad_tag(ctlr_info_t *h, u32 tag_index, + u32 raw_tag) { - ctlr_info_t *h = dev_id; + if (unlikely(tag_index >= h->nr_cmds)) { + dev_warn(&h->pdev->dev, "bad tag 0x%08x ignored.\n", raw_tag); + return 1; + } + return 0; +} + +static inline void finish_cmd(ctlr_info_t *h, CommandList_struct *c, + u32 raw_tag) +{ + removeQ(c); + if (likely(c->cmd_type == CMD_RWREQ)) + complete_command(h, c, 0); + else if (c->cmd_type == CMD_IOCTL_PEND) + complete(c->waiting); +#ifdef CONFIG_CISS_SCSI_TAPE + else if (c->cmd_type == CMD_SCSI) + complete_scsi_command(c, 0, raw_tag); +#endif +} + +static inline u32 next_command(ctlr_info_t *h) +{ + u32 a; + + if (unlikely(h->transMethod != CFGTBL_Trans_Performant)) + return h->access.command_completed(h); + + if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { + a = *(h->reply_pool_head); /* Next cmd in ring buffer */ + (h->reply_pool_head)++; + h->commands_outstanding--; + } else { + a = FIFO_EMPTY; + } + /* Check for wraparound */ + if (h->reply_pool_head == (h->reply_pool + h->max_commands)) { + h->reply_pool_head = h->reply_pool; + h->reply_pool_wraparound ^= 1; + } + return a; +} + +/* process completion of an indexed ("direct lookup") command */ +static inline u32 process_indexed_cmd(ctlr_info_t *h, u32 raw_tag) +{ + u32 tag_index; CommandList_struct *c; + + tag_index = cciss_tag_to_index(raw_tag); + if (bad_tag(h, tag_index, raw_tag)) + return next_command(h); + c = h->cmd_pool + tag_index; + finish_cmd(h, c, raw_tag); + return next_command(h); +} + +/* process completion of a non-indexed command */ +static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag) +{ + u32 tag; + CommandList_struct *c = NULL; + struct hlist_node *tmp; + __u32 busaddr_masked, tag_masked; + + tag = cciss_tag_discard_error_bits(raw_tag); + hlist_for_each_entry(c, tmp, &h->cmpQ, list) { + busaddr_masked = cciss_tag_discard_error_bits(c->busaddr); + tag_masked = cciss_tag_discard_error_bits(tag); + if (busaddr_masked == tag_masked) { + finish_cmd(h, c, raw_tag); + return next_command(h); + } + } + bad_tag(h, h->nr_cmds + 1, raw_tag); + return next_command(h); +} + +static irqreturn_t do_cciss_intx(int irq, void *dev_id) +{ + ctlr_info_t *h = dev_id; unsigned long flags; - __u32 a, a1, a2; + u32 raw_tag; if (interrupt_not_for_us(h)) return IRQ_NONE; @@ -3332,50 +3461,41 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id) */ spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); while (interrupt_pending(h)) { - while ((a = get_next_completion(h)) != FIFO_EMPTY) { - a1 = a; - if ((a & 0x04)) { - a2 = (a >> 3); - if (a2 >= h->nr_cmds) { - printk(KERN_WARNING - "cciss: controller cciss%d failed, stopping.\n", - h->ctlr); - spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); - fail_all_cmds(h->ctlr); - return IRQ_HANDLED; - } + raw_tag = get_next_completion(h); + while (raw_tag != FIFO_EMPTY) { + if (cciss_tag_contains_index(raw_tag)) + raw_tag = process_indexed_cmd(h, raw_tag); + else + raw_tag = process_nonindexed_cmd(h, raw_tag); + } + } - c = h->cmd_pool + a2; - a = c->busaddr; + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return IRQ_HANDLED; +} - } else { - struct hlist_node *tmp; +/* Add a second interrupt handler for MSI/MSI-X mode. In this mode we never + * check the interrupt pending register because it is not set. + */ +static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id) +{ + ctlr_info_t *h = dev_id; + unsigned long flags; + u32 raw_tag; - a &= ~3; - c = NULL; - hlist_for_each_entry(c, tmp, &h->cmpQ, list) { - if (c->busaddr == a) - break; - } - } - /* - * If we've found the command, take it off the - * completion Q and free it - */ - if (c && c->busaddr == a) { - removeQ(c); - if (c->cmd_type == CMD_RWREQ) { - complete_command(h, c, 0); - } else if (c->cmd_type == CMD_IOCTL_PEND) { - complete(c->waiting); - } -# ifdef CONFIG_CISS_SCSI_TAPE - else if (c->cmd_type == CMD_SCSI) - complete_scsi_command(c, 0, a1); -# endif - continue; - } - } + if (interrupt_not_for_us(h)) + return IRQ_NONE; + /* + * If there are completed commands in the completion queue, + * we had better do something about it. + */ + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); + raw_tag = get_next_completion(h); + while (raw_tag != FIFO_EMPTY) { + if (cciss_tag_contains_index(raw_tag)) + raw_tag = process_indexed_cmd(h, raw_tag); + else + raw_tag = process_nonindexed_cmd(h, raw_tag); } spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); @@ -3630,6 +3750,155 @@ static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr) return -1; } +/* Fill in bucket_map[], given nsgs (the max number of + * scatter gather elements supported) and bucket[], + * which is an array of 8 integers. The bucket[] array + * contains 8 different DMA transfer sizes (in 16 + * byte increments) which the controller uses to fetch + * commands. This function fills in bucket_map[], which + * maps a given number of scatter gather elements to one of + * the 8 DMA transfer sizes. The point of it is to allow the + * controller to only do as much DMA as needed to fetch the + * command, with the DMA transfer size encoded in the lower + * bits of the command address. + */ +static void calc_bucket_map(int bucket[], int num_buckets, + int nsgs, int *bucket_map) +{ + int i, j, b, size; + + /* even a command with 0 SGs requires 4 blocks */ +#define MINIMUM_TRANSFER_BLOCKS 4 +#define NUM_BUCKETS 8 + /* Note, bucket_map must have nsgs+1 entries. */ + for (i = 0; i <= nsgs; i++) { + /* Compute size of a command with i SG entries */ + size = i + MINIMUM_TRANSFER_BLOCKS; + b = num_buckets; /* Assume the biggest bucket */ + /* Find the bucket that is just big enough */ + for (j = 0; j < 8; j++) { + if (bucket[j] >= size) { + b = j; + break; + } + } + /* for a command with i SG entries, use bucket b. */ + bucket_map[i] = b; + } +} + +static void +cciss_put_controller_into_performant_mode(ctlr_info_t *h) +{ + int l = 0; + __u32 trans_support; + __u32 trans_offset; + /* + * 5 = 1 s/g entry or 4k + * 6 = 2 s/g entry or 8k + * 8 = 4 s/g entry or 16k + * 10 = 6 s/g entry or 24k + */ + int bft[8] = { 5, 6, 8, 10, 12, 20, 28, MAXSGENTRIES + 4}; + unsigned long register_value; + + BUILD_BUG_ON(28 > MAXSGENTRIES + 4); + + /* Attempt to put controller into performant mode if supported */ + /* Does board support performant mode? */ + trans_support = readl(&(h->cfgtable->TransportSupport)); + if (!(trans_support & PERFORMANT_MODE)) + return; + + printk(KERN_WARNING "cciss%d: Placing controller into " + "performant mode\n", h->ctlr); + /* Performant mode demands commands on a 32 byte boundary + * pci_alloc_consistent aligns on page boundarys already. + * Just need to check if divisible by 32 + */ + if ((sizeof(CommandList_struct) % 32) != 0) { + printk(KERN_WARNING "%s %d %s\n", + "cciss info: command size[", + (int)sizeof(CommandList_struct), + "] not divisible by 32, no performant mode..\n"); + return; + } + + /* Performant mode ring buffer and supporting data structures */ + h->reply_pool = (__u64 *)pci_alloc_consistent( + h->pdev, h->max_commands * sizeof(__u64), + &(h->reply_pool_dhandle)); + + /* Need a block fetch table for performant mode */ + h->blockFetchTable = kmalloc(((h->maxsgentries+1) * + sizeof(__u32)), GFP_KERNEL); + + if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL)) + goto clean_up; + + h->reply_pool_wraparound = 1; /* spec: init to 1 */ + + /* Controller spec: zero out this buffer. */ + memset(h->reply_pool, 0, h->max_commands * sizeof(__u64)); + h->reply_pool_head = h->reply_pool; + + trans_offset = readl(&(h->cfgtable->TransMethodOffset)); + calc_bucket_map(bft, ARRAY_SIZE(bft), h->maxsgentries, + h->blockFetchTable); + writel(bft[0], &h->transtable->BlockFetch0); + writel(bft[1], &h->transtable->BlockFetch1); + writel(bft[2], &h->transtable->BlockFetch2); + writel(bft[3], &h->transtable->BlockFetch3); + writel(bft[4], &h->transtable->BlockFetch4); + writel(bft[5], &h->transtable->BlockFetch5); + writel(bft[6], &h->transtable->BlockFetch6); + writel(bft[7], &h->transtable->BlockFetch7); + + /* size of controller ring buffer */ + writel(h->max_commands, &h->transtable->RepQSize); + writel(1, &h->transtable->RepQCount); + writel(0, &h->transtable->RepQCtrAddrLow32); + writel(0, &h->transtable->RepQCtrAddrHigh32); + writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32); + writel(0, &h->transtable->RepQAddr0High32); + writel(CFGTBL_Trans_Performant, + &(h->cfgtable->HostWrite.TransportRequest)); + + h->transMethod = CFGTBL_Trans_Performant; + writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); + /* under certain very rare conditions, this can take awhile. + * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right + * as we enter this code.) */ + for (l = 0; l < MAX_CONFIG_WAIT; l++) { + register_value = readl(h->vaddr + SA5_DOORBELL); + if (!(register_value & CFGTBL_ChangeReq)) + break; + /* delay and try again */ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(10); + } + register_value = readl(&(h->cfgtable->TransportActive)); + if (!(register_value & CFGTBL_Trans_Performant)) { + printk(KERN_WARNING "cciss: unable to get board into" + " performant mode\n"); + return; + } + + /* Change the access methods to the performant access methods */ + h->access = SA5_performant_access; + + return; +clean_up: + kfree(h->blockFetchTable); + if (h->reply_pool) + pci_free_consistent(h->pdev, + h->max_commands * sizeof(__u64), + h->reply_pool, + h->reply_pool_dhandle); + return; + +} /* cciss_put_controller_into_performant_mode */ + /* If MSI/MSI-X is supported by the kernel we will try to enable it on * controllers that are capable. If not, we use IO-APIC mode. */ @@ -3679,7 +3948,7 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *c, default_int_mode: #endif /* CONFIG_PCI_MSI */ /* if we get here we're going to use the default interrupt mode */ - c->intr[SIMPLE_MODE_INT] = pdev->irq; + c->intr[PERF_MODE_INT] = pdev->irq; return; } @@ -3691,6 +3960,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) __u32 cfg_base_addr; __u64 cfg_base_addr_index; int i, prod_index, err; + __u32 trans_offset; subsystem_vendor_id = pdev->subsystem_vendor; subsystem_device_id = pdev->subsystem_device; @@ -3804,11 +4074,16 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) c->cfgtable = remap_pci_mem(pci_resource_start(pdev, cfg_base_addr_index) + cfg_offset, sizeof(CfgTable_struct)); + /* Find performant mode table. */ + trans_offset = readl(&(c->cfgtable->TransMethodOffset)); + c->transtable = remap_pci_mem(pci_resource_start(pdev, + cfg_base_addr_index) + cfg_offset+trans_offset, + sizeof(*c->transtable)); c->board_id = board_id; -#ifdef CCISS_DEBUG - print_cfg_table(c->cfgtable); -#endif /* CCISS_DEBUG */ + #ifdef CCISS_DEBUG + print_cfg_table(c->cfgtable); + #endif /* CCISS_DEBUG */ /* Some controllers support Zero Memory Raid (ZMR). * When configured in ZMR mode the number of supported @@ -3818,7 +4093,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) * are supported on the controller then subtract 4 to * leave a little room for ioctl calls. */ - c->max_commands = readl(&(c->cfgtable->CmdsOutMax)); + c->max_commands = readl(&(c->cfgtable->MaxPerformantModeCommands)); c->maxsgentries = readl(&(c->cfgtable->MaxSGElements)); /* @@ -3863,7 +4138,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) * kernels revealed a bug in the refetch if dom0 resides on a P600. */ if(board_id == 0x3225103C) { - __u32 dma_prefetch; + __u32 dma_prefetch; __u32 dma_refetch; dma_prefetch = readl(c->vaddr + I2O_DMA1_CFG); dma_prefetch |= 0x8000; @@ -3874,38 +4149,9 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) } #ifdef CCISS_DEBUG - printk("Trying to put board into Simple mode\n"); -#endif /* CCISS_DEBUG */ - c->max_commands = readl(&(c->cfgtable->CmdsOutMax)); - /* Update the field, and then ring the doorbell */ - writel(CFGTBL_Trans_Simple, &(c->cfgtable->HostWrite.TransportRequest)); - writel(CFGTBL_ChangeReq, c->vaddr + SA5_DOORBELL); - - /* under certain very rare conditions, this can take awhile. - * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right - * as we enter this code.) */ - for (i = 0; i < MAX_CONFIG_WAIT; i++) { - if (!(readl(c->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq)) - break; - /* delay and try again */ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(msecs_to_jiffies(1)); - } - -#ifdef CCISS_DEBUG - printk(KERN_DEBUG "I counter got to %d %x\n", i, - readl(c->vaddr + SA5_DOORBELL)); -#endif /* CCISS_DEBUG */ -#ifdef CCISS_DEBUG - print_cfg_table(c->cfgtable); + printk(KERN_WARNING "Trying to put board into Performant mode\n"); #endif /* CCISS_DEBUG */ - - if (!(readl(&(c->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) { - printk(KERN_WARNING "cciss: unable to get board into" - " simple mode\n"); - err = -ENODEV; - goto err_out_free_res; - } + cciss_put_controller_into_performant_mode(c); return 0; err_out_free_res: @@ -4190,7 +4436,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, i = alloc_cciss_hba(); if (i < 0) return -1; - hba[i]->busy_initializing = 1; INIT_HLIST_HEAD(&hba[i]->cmpQ); INIT_HLIST_HEAD(&hba[i]->reqQ); @@ -4238,16 +4483,26 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, /* make sure the board interrupts are off */ hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF); - if (request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr, - IRQF_DISABLED | IRQF_SHARED, hba[i]->devname, hba[i])) { - printk(KERN_ERR "cciss: Unable to get irq %d for %s\n", - hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname); - goto clean2; + if (hba[i]->msi_vector || hba[i]->msix_vector) { + if (request_irq(hba[i]->intr[PERF_MODE_INT], + do_cciss_msix_intr, + IRQF_DISABLED, hba[i]->devname, hba[i])) { + printk(KERN_ERR "cciss: Unable to get irq %d for %s\n", + hba[i]->intr[PERF_MODE_INT], hba[i]->devname); + goto clean2; + } + } else { + if (request_irq(hba[i]->intr[PERF_MODE_INT], do_cciss_intx, + IRQF_DISABLED, hba[i]->devname, hba[i])) { + printk(KERN_ERR "cciss: Unable to get irq %d for %s\n", + hba[i]->intr[PERF_MODE_INT], hba[i]->devname); + goto clean2; + } } printk(KERN_INFO "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n", hba[i]->devname, pdev->device, pci_name(pdev), - hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not"); + hba[i]->intr[PERF_MODE_INT], dac ? "" : " not"); hba[i]->cmd_pool_bits = kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) @@ -4353,7 +4608,7 @@ clean4: hba[i]->nr_cmds * sizeof(ErrorInfo_struct), hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle); - free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]); + free_irq(hba[i]->intr[PERF_MODE_INT], hba[i]); clean2: unregister_blkdev(hba[i]->major, hba[i]->devname); clean1: @@ -4395,7 +4650,7 @@ static void cciss_shutdown(struct pci_dev *pdev) printk(KERN_WARNING "cciss%d: Error flushing cache\n", h->ctlr); h->access.set_intr_mask(h, CCISS_INTR_OFF); - free_irq(h->intr[2], h); + free_irq(h->intr[PERF_MODE_INT], h); } static void __devexit cciss_remove_one(struct pci_dev *pdev) @@ -4495,7 +4750,6 @@ static int __init cciss_init(void) * array of them, the size must be a multiple of 8 bytes. */ BUILD_BUG_ON(sizeof(CommandList_struct) % COMMANDLIST_ALIGNMENT); - printk(KERN_INFO DRIVER_NAME "\n"); err = bus_register(&cciss_bus_type); @@ -4542,46 +4796,5 @@ static void __exit cciss_cleanup(void) bus_unregister(&cciss_bus_type); } -static void fail_all_cmds(unsigned long ctlr) -{ - /* If we get here, the board is apparently dead. */ - ctlr_info_t *h = hba[ctlr]; - CommandList_struct *c; - unsigned long flags; - - printk(KERN_WARNING "cciss%d: controller not responding.\n", h->ctlr); - h->alive = 0; /* the controller apparently died... */ - - spin_lock_irqsave(CCISS_LOCK(ctlr), flags); - - pci_disable_device(h->pdev); /* Make sure it is really dead. */ - - /* move everything off the request queue onto the completed queue */ - while (!hlist_empty(&h->reqQ)) { - c = hlist_entry(h->reqQ.first, CommandList_struct, list); - removeQ(c); - h->Qdepth--; - addQ(&h->cmpQ, c); - } - - /* Now, fail everything on the completed queue with a HW error */ - while (!hlist_empty(&h->cmpQ)) { - c = hlist_entry(h->cmpQ.first, CommandList_struct, list); - removeQ(c); - if (c->cmd_type != CMD_MSG_STALE) - c->err_info->CommandStatus = CMD_HARDWARE_ERR; - if (c->cmd_type == CMD_RWREQ) { - complete_command(h, c, 0); - } else if (c->cmd_type == CMD_IOCTL_PEND) - complete(c->waiting); -#ifdef CONFIG_CISS_SCSI_TAPE - else if (c->cmd_type == CMD_SCSI) - complete_scsi_command(c, 0, 0); -#endif - } - spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); - return; -} - module_init(cciss_init); module_exit(cciss_cleanup); diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index c5d411174db0..8a9f5b58daa8 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -25,7 +25,7 @@ struct access_method { void (*submit_command)(ctlr_info_t *h, CommandList_struct *c); void (*set_intr_mask)(ctlr_info_t *h, unsigned long val); unsigned long (*fifo_full)(ctlr_info_t *h); - unsigned long (*intr_pending)(ctlr_info_t *h); + bool (*intr_pending)(ctlr_info_t *h); unsigned long (*command_completed)(ctlr_info_t *h); }; typedef struct _drive_info_struct @@ -85,8 +85,8 @@ struct ctlr_info int max_cmd_sgentries; SGDescriptor_struct **cmd_sg_list; -# define DOORBELL_INT 0 -# define PERF_MODE_INT 1 +# define PERF_MODE_INT 0 +# define DOORBELL_INT 1 # define SIMPLE_MODE_INT 2 # define MEMQ_MODE_INT 3 unsigned int intr[4]; @@ -137,10 +137,27 @@ struct ctlr_info struct list_head scan_list; struct completion scan_wait; struct device dev; + /* + * Performant mode tables. + */ + u32 trans_support; + u32 trans_offset; + struct TransTable_struct *transtable; + unsigned long transMethod; + + /* + * Performant mode completion buffer + */ + u64 *reply_pool; + dma_addr_t reply_pool_dhandle; + u64 *reply_pool_head; + size_t reply_pool_size; + unsigned char reply_pool_wraparound; + u32 *blockFetchTable; }; -/* Defining the diffent access_menthods */ -/* +/* Defining the diffent access_methods + * * Memory mapped FIFO interface (SMART 53xx cards) */ #define SA5_DOORBELL 0x20 @@ -159,6 +176,15 @@ struct ctlr_info #define SA5B_INTR_PENDING 0x04 #define FIFO_EMPTY 0xffffffff #define CCISS_FIRMWARE_READY 0xffff0000 /* value in scratchpad register */ +/* Perf. mode flags */ +#define SA5_PERF_INTR_PENDING 0x04 +#define SA5_PERF_INTR_OFF 0x05 +#define SA5_OUTDB_STATUS_PERF_BIT 0x01 +#define SA5_OUTDB_CLEAR_PERF_BIT 0x01 +#define SA5_OUTDB_CLEAR 0xA0 +#define SA5_OUTDB_CLEAR_PERF_BIT 0x01 +#define SA5_OUTDB_STATUS 0x9C + #define CISS_ERROR_BIT 0x02 @@ -170,8 +196,9 @@ struct ctlr_info static void SA5_submit_command( ctlr_info_t *h, CommandList_struct *c) { #ifdef CCISS_DEBUG - printk("Sending %x - down to controller\n", c->busaddr ); -#endif /* CCISS_DEBUG */ + printk(KERN_WARNING "cciss%d: Sending %08x - down to controller\n", + h->ctlr, c->busaddr); +#endif /* CCISS_DEBUG */ writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET); h->commands_outstanding++; if ( h->commands_outstanding > h->max_outstanding) @@ -214,6 +241,20 @@ static void SA5B_intr_mask(ctlr_info_t *h, unsigned long val) h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); } } + +/* Performant mode intr_mask */ +static void SA5_performant_intr_mask(ctlr_info_t *h, unsigned long val) +{ + if (val) { /* turn on interrupts */ + h->interrupts_enabled = 1; + writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); + } else { + h->interrupts_enabled = 0; + writel(SA5_PERF_INTR_OFF, + h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); + } +} + /* * Returns true if fifo is full. * @@ -250,10 +291,44 @@ static unsigned long SA5_completed(ctlr_info_t *h) return ( register_value); } + +/* Performant mode command completed */ +static unsigned long SA5_performant_completed(ctlr_info_t *h) +{ + unsigned long register_value = FIFO_EMPTY; + + /* flush the controller write of the reply queue by reading + * outbound doorbell status register. + */ + register_value = readl(h->vaddr + SA5_OUTDB_STATUS); + /* msi auto clears the interrupt pending bit. */ + if (!(h->msi_vector || h->msix_vector)) { + writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR); + /* Do a read in order to flush the write to the controller + * (as per spec.) + */ + register_value = readl(h->vaddr + SA5_OUTDB_STATUS); + } + + if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { + register_value = *(h->reply_pool_head); + (h->reply_pool_head)++; + h->commands_outstanding--; + } else { + register_value = FIFO_EMPTY; + } + /* Check for wraparound */ + if (h->reply_pool_head == (h->reply_pool + h->max_commands)) { + h->reply_pool_head = h->reply_pool; + h->reply_pool_wraparound ^= 1; + } + + return register_value; +} /* * Returns true if an interrupt is pending.. */ -static unsigned long SA5_intr_pending(ctlr_info_t *h) +static bool SA5_intr_pending(ctlr_info_t *h) { unsigned long register_value = readl(h->vaddr + SA5_INTR_STATUS); @@ -268,7 +343,7 @@ static unsigned long SA5_intr_pending(ctlr_info_t *h) /* * Returns true if an interrupt is pending.. */ -static unsigned long SA5B_intr_pending(ctlr_info_t *h) +static bool SA5B_intr_pending(ctlr_info_t *h) { unsigned long register_value = readl(h->vaddr + SA5_INTR_STATUS); @@ -280,6 +355,20 @@ static unsigned long SA5B_intr_pending(ctlr_info_t *h) return 0 ; } +static bool SA5_performant_intr_pending(ctlr_info_t *h) +{ + unsigned long register_value = readl(h->vaddr + SA5_INTR_STATUS); + + if (!register_value) + return false; + + if (h->msi_vector || h->msix_vector) + return true; + + /* Read outbound doorbell to flush */ + register_value = readl(h->vaddr + SA5_OUTDB_STATUS); + return register_value & SA5_OUTDB_STATUS_PERF_BIT; +} static struct access_method SA5_access = { SA5_submit_command, @@ -297,6 +386,14 @@ static struct access_method SA5B_access = { SA5_completed, }; +static struct access_method SA5_performant_access = { + SA5_submit_command, + SA5_performant_intr_mask, + SA5_fifo_full, + SA5_performant_intr_pending, + SA5_performant_completed, +}; + struct board_type { __u32 board_id; char *product_name; diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index e624ff959cb6..936b9666da6a 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h @@ -54,6 +54,7 @@ #define CFGTBL_AccCmds 0x00000001l #define CFGTBL_Trans_Simple 0x00000002l +#define CFGTBL_Trans_Performant 0x00000004l #define CFGTBL_BusType_Ultra2 0x00000001l #define CFGTBL_BusType_Ultra3 0x00000002l @@ -173,12 +174,15 @@ typedef struct _SGDescriptor_struct { * PAD_64 can be adjusted independently as needed for 32-bit * and 64-bits systems. */ -#define COMMANDLIST_ALIGNMENT (8) +#define COMMANDLIST_ALIGNMENT (32) #define IS_64_BIT ((sizeof(long) - 4)/4) #define IS_32_BIT (!IS_64_BIT) #define PAD_32 (0) #define PAD_64 (4) #define PADSIZE (IS_32_BIT * PAD_32 + IS_64_BIT * PAD_64) +#define DIRECT_LOOKUP_BIT 0x10 +#define DIRECT_LOOKUP_SHIFT 5 + typedef struct _CommandList_struct { CommandListHeader_struct Header; RequestBlock_struct Request; @@ -195,7 +199,7 @@ typedef struct _CommandList_struct { struct completion *waiting; int retry_count; void * scsi_cmd; - char pad[PADSIZE]; + char pad[PADSIZE]; } CommandList_struct; /* Configuration Table Structure */ @@ -209,12 +213,15 @@ typedef struct _HostWrite_struct { typedef struct _CfgTable_struct { BYTE Signature[4]; DWORD SpecValence; +#define SIMPLE_MODE 0x02 +#define PERFORMANT_MODE 0x04 +#define MEMQ_MODE 0x08 DWORD TransportSupport; DWORD TransportActive; HostWrite_struct HostWrite; DWORD CmdsOutMax; DWORD BusTypes; - DWORD Reserved; + DWORD TransMethodOffset; BYTE ServerName[16]; DWORD HeartBeat; DWORD SCSI_Prefetch; @@ -222,6 +229,25 @@ typedef struct _CfgTable_struct { DWORD MaxLogicalUnits; DWORD MaxPhysicalDrives; DWORD MaxPhysicalDrivesPerLogicalUnit; + DWORD MaxPerformantModeCommands; } CfgTable_struct; + +struct TransTable_struct { + u32 BlockFetch0; + u32 BlockFetch1; + u32 BlockFetch2; + u32 BlockFetch3; + u32 BlockFetch4; + u32 BlockFetch5; + u32 BlockFetch6; + u32 BlockFetch7; + u32 RepQSize; + u32 RepQCount; + u32 RepQCtrAddrLow32; + u32 RepQCtrAddrHigh32; + u32 RepQAddr0Low32; + u32 RepQAddr0High32; +}; + #pragma pack() #endif /* CCISS_CMD_H */ diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index 3381505c8a6c..8e0a709286df 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -93,8 +93,8 @@ static struct scsi_host_template cciss_driver_template = { #pragma pack(1) -#define SCSI_PAD_32 0 -#define SCSI_PAD_64 0 +#define SCSI_PAD_32 8 +#define SCSI_PAD_64 8 struct cciss_scsi_cmd_stack_elem_t { CommandList_struct cmd; @@ -861,6 +861,7 @@ cciss_scsi_detect(int ctlr) sh->n_io_port = 0; // I don't think we use these two... sh->this_id = SELF_SCSI_ID; sh->sg_tablesize = hba[ctlr]->maxsgentries; + sh->max_cmd_len = MAX_COMMAND_SIZE; ((struct cciss_scsi_adapter_data_t *) hba[ctlr]->scsi_ctlr)->scsi_host = sh; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 91d11631cec9..abb4ec6690fc 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -386,7 +386,7 @@ static void __devexit cpqarray_remove_one_eisa (int i) } /* pdev is NULL for eisa */ -static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev) +static int __devinit cpqarray_register_ctlr( int i, struct pci_dev *pdev) { struct request_queue *q; int j; @@ -503,7 +503,7 @@ Enomem4: return -1; } -static int __init cpqarray_init_one( struct pci_dev *pdev, +static int __devinit cpqarray_init_one( struct pci_dev *pdev, const struct pci_device_id *ent) { int i; @@ -740,7 +740,7 @@ __setup("smart2=", cpqarray_setup); /* * Find an EISA controller's signature. Set up an hba if we find it. */ -static int __init cpqarray_eisa_detect(void) +static int __devinit cpqarray_eisa_detect(void) { int i=0, j; __u32 board_id; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6b077f93acc6..7258c95e895e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1236,8 +1236,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Last part of the attaching process ... */ if (ns.conn >= C_CONNECTED && os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { - kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */ - mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */ drbd_send_sizes(mdev, 0, 0); /* to start sync... */ drbd_send_uuids(mdev); drbd_send_state(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 632e3245d1bb..2151f18b21de 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1114,6 +1114,12 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp mdev->new_state_tmp.i = ns.i; ns.i = os.i; ns.disk = D_NEGOTIATING; + + /* We expect to receive up-to-date UUIDs soon. + To avoid a race in receive_state, free p_uuid while + holding req_lock. I.e. atomic with the state change */ + kfree(mdev->p_uuid); + mdev->p_uuid = NULL; } rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 90c4038702da..82c30f9f81ca 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -514,8 +514,6 @@ static unsigned long fdc_busy; static DECLARE_WAIT_QUEUE_HEAD(fdc_wait); static DECLARE_WAIT_QUEUE_HEAD(command_done); -#define NO_SIGNAL (!interruptible || !signal_pending(current)) - /* Errors during formatting are counted here. */ static int format_errors; @@ -578,7 +576,7 @@ static void reset_fdc(void); #define NEED_1_RECAL -2 #define NEED_2_RECAL -3 -static int usage_count; +static atomic_t usage_count = ATOMIC_INIT(0); /* buffer related variables */ static int buffer_track = -1; @@ -858,36 +856,15 @@ static void set_fdc(int drive) } /* locks the driver */ -static int _lock_fdc(int drive, bool interruptible, int line) +static int lock_fdc(int drive, bool interruptible) { - if (!usage_count) { - pr_err("Trying to lock fdc while usage count=0 at line %d\n", - line); + if (WARN(atomic_read(&usage_count) == 0, + "Trying to lock fdc while usage count=0\n")) return -1; - } - - if (test_and_set_bit(0, &fdc_busy)) { - DECLARE_WAITQUEUE(wait, current); - add_wait_queue(&fdc_wait, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - - if (!test_and_set_bit(0, &fdc_busy)) - break; - - schedule(); - - if (!NO_SIGNAL) { - remove_wait_queue(&fdc_wait, &wait); - return -EINTR; - } - } + if (wait_event_interruptible(fdc_wait, !test_and_set_bit(0, &fdc_busy))) + return -EINTR; - set_current_state(TASK_RUNNING); - remove_wait_queue(&fdc_wait, &wait); - flush_scheduled_work(); - } command_status = FD_COMMAND_NONE; __reschedule_timeout(drive, "lock fdc"); @@ -895,11 +872,8 @@ static int _lock_fdc(int drive, bool interruptible, int line) return 0; } -#define lock_fdc(drive, interruptible) \ - _lock_fdc(drive, interruptible, __LINE__) - /* unlocks the driver */ -static inline void unlock_fdc(void) +static void unlock_fdc(void) { unsigned long flags; @@ -1224,7 +1198,7 @@ static int need_more_output(void) /* Set perpendicular mode as required, based on data rate, if supported. * 82077 Now tested. 1Mbps data rate only possible with 82077-1. */ -static inline void perpendicular_mode(void) +static void perpendicular_mode(void) { unsigned char perp_mode; @@ -2015,25 +1989,10 @@ static int wait_til_done(void (*handler)(void), bool interruptible) schedule_bh(handler); - if (command_status < 2 && NO_SIGNAL) { - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&command_done, &wait); - for (;;) { - set_current_state(interruptible ? - TASK_INTERRUPTIBLE : - TASK_UNINTERRUPTIBLE); - - if (command_status >= 2 || !NO_SIGNAL) - break; - - is_alive(__func__, ""); - schedule(); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&command_done, &wait); - } + if (interruptible) + wait_event_interruptible(command_done, command_status >= 2); + else + wait_event(command_done, command_status >= 2); if (command_status < 2) { cancel_activity(); @@ -2583,10 +2542,8 @@ static int make_raw_rw_request(void) int tracksize; int ssize; - if (max_buffer_sectors == 0) { - pr_info("VFS: Block I/O scheduled on unopened device\n"); + if (WARN(max_buffer_sectors == 0, "VFS: Block I/O scheduled on unopened device\n")) return 0; - } set_fdc((long)current_req->rq_disk->private_data); @@ -2936,19 +2893,16 @@ static void process_fd_request(void) static void do_fd_request(struct request_queue *q) { - if (max_buffer_sectors == 0) { - pr_info("VFS: %s called on non-open device\n", __func__); + if (WARN(max_buffer_sectors == 0, + "VFS: %s called on non-open device\n", __func__)) return; - } - if (usage_count == 0) { - pr_info("warning: usage count=0, current_req=%p exiting\n", - current_req); - pr_info("sect=%ld type=%x flags=%x\n", - (long)blk_rq_pos(current_req), current_req->cmd_type, - current_req->cmd_flags); + if (WARN(atomic_read(&usage_count) == 0, + "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n", + current_req, (long)blk_rq_pos(current_req), current_req->cmd_type, + current_req->cmd_flags)) return; - } + if (test_bit(0, &fdc_busy)) { /* fdc busy, this new request will be treated when the current one is done */ @@ -3033,7 +2987,7 @@ static inline int fd_copyin(void __user *param, void *address, return copy_from_user(address, param, size) ? -EFAULT : 0; } -static inline const char *drive_name(int type, int drive) +static const char *drive_name(int type, int drive) { struct floppy_struct *floppy; @@ -3103,7 +3057,7 @@ static struct cont_t raw_cmd_cont = { .done = raw_cmd_done }; -static inline int raw_cmd_copyout(int cmd, void __user *param, +static int raw_cmd_copyout(int cmd, void __user *param, struct floppy_raw_cmd *ptr) { int ret; @@ -3148,7 +3102,7 @@ static void raw_cmd_free(struct floppy_raw_cmd **ptr) } } -static inline int raw_cmd_copyin(int cmd, void __user *param, +static int raw_cmd_copyin(int cmd, void __user *param, struct floppy_raw_cmd **rcmd) { struct floppy_raw_cmd *ptr; @@ -3266,7 +3220,7 @@ static int invalidate_drive(struct block_device *bdev) return 0; } -static inline int set_geometry(unsigned int cmd, struct floppy_struct *g, +static int set_geometry(unsigned int cmd, struct floppy_struct *g, int drive, int type, struct block_device *bdev) { int cnt; @@ -3337,7 +3291,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g, } /* handle obsolete ioctl's */ -static int ioctl_table[] = { +static unsigned int ioctl_table[] = { FDCLRPRM, FDSETPRM, FDDEFPRM, @@ -3365,7 +3319,7 @@ static int ioctl_table[] = { FDTWADDLE }; -static inline int normalize_ioctl(int *cmd, int *size) +static int normalize_ioctl(unsigned int *cmd, int *size) { int i; @@ -3829,6 +3783,7 @@ static int __floppy_read_block_0(struct block_device *bdev) bio.bi_size = size; bio.bi_bdev = bdev; bio.bi_sector = 0; + bio.bi_flags = BIO_QUIET; init_completion(&complete); bio.bi_private = &complete; bio.bi_end_io = floppy_rb0_complete; @@ -3857,10 +3812,10 @@ static int floppy_revalidate(struct gendisk *disk) if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || test_bit(FD_VERIFY_BIT, &UDRS->flags) || test_bit(drive, &fake_change) || NO_GEOM) { - if (usage_count == 0) { - pr_info("VFS: revalidate called on non-open device.\n"); + if (WARN(atomic_read(&usage_count) == 0, + "VFS: revalidate called on non-open device.\n")) return -EFAULT; - } + lock_fdc(drive, false); cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) || test_bit(FD_VERIFY_BIT, &UDRS->flags)); @@ -4126,7 +4081,7 @@ static ssize_t floppy_cmos_show(struct device *dev, return sprintf(buf, "%X\n", UDP->cmos); } -DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL); +static DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL); static void floppy_device_release(struct device *dev) { @@ -4175,6 +4130,9 @@ static int __init floppy_init(void) int i, unit, drive; int err, dr; + set_debugt(); + interruptjiffies = resultjiffies = jiffies; + #if defined(CONFIG_PPC) if (check_legacy_ioport(FDC1)) return -ENODEV; @@ -4353,7 +4311,7 @@ out_unreg_platform_dev: platform_device_unregister(&floppy_device[drive]); out_flush_work: flush_scheduled_work(); - if (usage_count) + if (atomic_read(&usage_count)) floppy_release_irq_and_dma(); out_unreg_region: blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); @@ -4370,8 +4328,6 @@ out_put_disk: return err; } -static DEFINE_SPINLOCK(floppy_usage_lock); - static const struct io_region { int offset; int size; @@ -4417,14 +4373,8 @@ static void floppy_release_regions(int fdc) static int floppy_grab_irq_and_dma(void) { - unsigned long flags; - - spin_lock_irqsave(&floppy_usage_lock, flags); - if (usage_count++) { - spin_unlock_irqrestore(&floppy_usage_lock, flags); + if (atomic_inc_return(&usage_count) > 1) return 0; - } - spin_unlock_irqrestore(&floppy_usage_lock, flags); /* * We might have scheduled a free_irq(), wait it to @@ -4435,9 +4385,7 @@ static int floppy_grab_irq_and_dma(void) if (fd_request_irq()) { DPRINT("Unable to grab IRQ%d for the floppy driver\n", FLOPPY_IRQ); - spin_lock_irqsave(&floppy_usage_lock, flags); - usage_count--; - spin_unlock_irqrestore(&floppy_usage_lock, flags); + atomic_dec(&usage_count); return -1; } if (fd_request_dma()) { @@ -4447,9 +4395,7 @@ static int floppy_grab_irq_and_dma(void) use_virtual_dma = can_use_virtual_dma = 1; if (!(can_use_virtual_dma & 1)) { fd_free_irq(); - spin_lock_irqsave(&floppy_usage_lock, flags); - usage_count--; - spin_unlock_irqrestore(&floppy_usage_lock, flags); + atomic_dec(&usage_count); return -1; } } @@ -4484,9 +4430,7 @@ cleanup: fd_free_dma(); while (--fdc >= 0) floppy_release_regions(fdc); - spin_lock_irqsave(&floppy_usage_lock, flags); - usage_count--; - spin_unlock_irqrestore(&floppy_usage_lock, flags); + atomic_dec(&usage_count); return -1; } @@ -4498,14 +4442,10 @@ static void floppy_release_irq_and_dma(void) #endif long tmpsize; unsigned long tmpaddr; - unsigned long flags; - spin_lock_irqsave(&floppy_usage_lock, flags); - if (--usage_count) { - spin_unlock_irqrestore(&floppy_usage_lock, flags); + if (!atomic_dec_and_test(&usage_count)) return; - } - spin_unlock_irqrestore(&floppy_usage_lock, flags); + if (irqdma_allocated) { fd_disable_dma(); fd_free_dma(); @@ -4598,7 +4538,7 @@ static void __exit floppy_module_exit(void) del_timer_sync(&fd_timer); blk_cleanup_queue(floppy_queue); - if (usage_count) + if (atomic_read(&usage_count)) floppy_release_irq_and_dma(); /* eject disk, if any */ diff --git a/fs/afs/write.c b/fs/afs/write.c index 3dab9e9948d0..722743b152d8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode) { struct address_space *mapping = vnode->vfs_inode.i_mapping; struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_cyclic = 1, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a4080c21ec55..d74e6af9b53a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { - .bdi = wbc->bdi, .sync_mode = wbc->sync_mode, .older_than_this = NULL, .nr_to_write = 64, @@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, .sync_io = mode == WB_SYNC_ALL, }; struct writeback_control wbc_writepages = { - .bdi = inode->i_mapping->backing_dev_info, .sync_mode = mode, .older_than_this = NULL, .nr_to_write = nr_pages * 2, diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1d1088f48bc2..5455009b4142 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -63,24 +63,16 @@ struct bdi_work { }; enum { - WS_USED_B = 0, - WS_ONSTACK_B, + WS_INPROGRESS = 0, + WS_ONSTACK, }; -#define WS_USED (1 << WS_USED_B) -#define WS_ONSTACK (1 << WS_ONSTACK_B) - -static inline bool bdi_work_on_stack(struct bdi_work *work) -{ - return test_bit(WS_ONSTACK_B, &work->state); -} - static inline void bdi_work_init(struct bdi_work *work, struct wb_writeback_args *args) { INIT_RCU_HEAD(&work->rcu_head); work->args = *args; - work->state = WS_USED; + __set_bit(WS_INPROGRESS, &work->state); } /** @@ -95,43 +87,16 @@ int writeback_in_progress(struct backing_dev_info *bdi) return !list_empty(&bdi->work_list); } -static void bdi_work_clear(struct bdi_work *work) -{ - clear_bit(WS_USED_B, &work->state); - smp_mb__after_clear_bit(); - /* - * work can have disappeared at this point. bit waitq functions - * should be able to tolerate this, provided bdi_sched_wait does - * not dereference it's pointer argument. - */ - wake_up_bit(&work->state, WS_USED_B); -} - static void bdi_work_free(struct rcu_head *head) { struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); - if (!bdi_work_on_stack(work)) - kfree(work); - else - bdi_work_clear(work); -} - -static void wb_work_complete(struct bdi_work *work) -{ - const enum writeback_sync_modes sync_mode = work->args.sync_mode; - int onstack = bdi_work_on_stack(work); + clear_bit(WS_INPROGRESS, &work->state); + smp_mb__after_clear_bit(); + wake_up_bit(&work->state, WS_INPROGRESS); - /* - * For allocated work, we can clear the done/seen bit right here. - * For on-stack work, we need to postpone both the clear and free - * to after the RCU grace period, since the stack could be invalidated - * as soon as bdi_work_clear() has done the wakeup. - */ - if (!onstack) - bdi_work_clear(work); - if (sync_mode == WB_SYNC_NONE || onstack) - call_rcu(&work->rcu_head, bdi_work_free); + if (!test_bit(WS_ONSTACK, &work->state)) + kfree(work); } static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) @@ -147,7 +112,7 @@ static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) list_del_rcu(&work->list); spin_unlock(&bdi->wb_lock); - wb_work_complete(work); + call_rcu(&work->rcu_head, bdi_work_free); } } @@ -185,9 +150,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) * Used for on-stack allocated work items. The caller needs to wait until * the wb threads have acked the work before it's safe to continue. */ -static void bdi_wait_on_work_clear(struct bdi_work *work) +static void bdi_wait_on_work_done(struct bdi_work *work) { - wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait, + wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait, TASK_UNINTERRUPTIBLE); } @@ -213,37 +178,28 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, } /** - * bdi_sync_writeback - start and wait for writeback - * @bdi: the backing device to write from + * bdi_queue_work_onstack - start and wait for writeback * @sb: write inodes from this super_block * * Description: - * This does WB_SYNC_ALL data integrity writeback and waits for the - * IO to complete. Callers must hold the sb s_umount semaphore for + * This function initiates writeback and waits for the operation to + * complete. Callers must hold the sb s_umount semaphore for * reading, to avoid having the super disappear before we are done. */ -static void bdi_sync_writeback(struct backing_dev_info *bdi, - struct super_block *sb) +static void bdi_queue_work_onstack(struct wb_writeback_args *args) { - struct wb_writeback_args args = { - .sb = sb, - .sync_mode = WB_SYNC_ALL, - .nr_pages = LONG_MAX, - .range_cyclic = 0, - }; struct bdi_work work; - bdi_work_init(&work, &args); - work.state |= WS_ONSTACK; + bdi_work_init(&work, args); + __set_bit(WS_ONSTACK, &work.state); - bdi_queue_work(bdi, &work); - bdi_wait_on_work_clear(&work); + bdi_queue_work(args->sb->s_bdi, &work); + bdi_wait_on_work_done(&work); } /** * bdi_start_writeback - start writeback * @bdi: the backing device to write from - * @sb: write inodes from this super_block * @nr_pages: the number of pages to write * * Description: @@ -252,25 +208,34 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, * completion. Caller need not hold sb s_umount semaphore. * */ -void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages) +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) { struct wb_writeback_args args = { - .sb = sb, .sync_mode = WB_SYNC_NONE, .nr_pages = nr_pages, .range_cyclic = 1, }; - /* - * We treat @nr_pages=0 as the special case to do background writeback, - * ie. to sync pages until the background dirty threshold is reached. - */ - if (!nr_pages) { - args.nr_pages = LONG_MAX; - args.for_background = 1; - } + bdi_alloc_queue_work(bdi, &args); +} +/** + * bdi_start_background_writeback - start background writeback + * @bdi: the backing device to write from + * + * Description: + * This does WB_SYNC_NONE background writeback. The IO is only + * started when this function returns, we make no guarentees on + * completion. Caller need not hold sb s_umount semaphore. + */ +void bdi_start_background_writeback(struct backing_dev_info *bdi) +{ + struct wb_writeback_args args = { + .sync_mode = WB_SYNC_NONE, + .nr_pages = LONG_MAX, + .for_background = 1, + .range_cyclic = 1, + }; bdi_alloc_queue_work(bdi, &args); } @@ -561,75 +526,69 @@ select_queue: return ret; } -static void unpin_sb_for_writeback(struct super_block *sb) -{ - up_read(&sb->s_umount); - put_super(sb); -} - -enum sb_pin_state { - SB_PINNED, - SB_NOT_PINNED, - SB_PIN_FAILED -}; - /* - * For WB_SYNC_NONE writeback, the caller does not have the sb pinned + * For background writeback the caller does not have the sb pinned * before calling writeback. So make sure that we do pin it, so it doesn't * go away while we are writing inodes from it. */ -static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, - struct super_block *sb) +static bool pin_sb_for_writeback(struct super_block *sb) { - /* - * Caller must already hold the ref for this - */ - if (wbc->sync_mode == WB_SYNC_ALL) { - WARN_ON(!rwsem_is_locked(&sb->s_umount)); - return SB_NOT_PINNED; - } spin_lock(&sb_lock); + if (list_empty(&sb->s_instances)) { + spin_unlock(&sb_lock); + return false; + } + sb->s_count++; + spin_unlock(&sb_lock); + if (down_read_trylock(&sb->s_umount)) { - if (sb->s_root) { - spin_unlock(&sb_lock); - return SB_PINNED; - } - /* - * umounted, drop rwsem again and fall through to failure - */ + if (sb->s_root) + return true; up_read(&sb->s_umount); } - sb->s_count--; - spin_unlock(&sb_lock); - return SB_PIN_FAILED; + + put_super(sb); + return false; } /* * Write a portion of b_io inodes which belong to @sb. - * If @wbc->sb != NULL, then find and write all such + * + * If @only_this_sb is true, then find and write all such * inodes. Otherwise write only ones which go sequentially * in reverse order. + * * Return 1, if the caller writeback routine should be * interrupted. Otherwise return 0. */ -static int writeback_sb_inodes(struct super_block *sb, - struct bdi_writeback *wb, - struct writeback_control *wbc) +static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, + struct writeback_control *wbc, bool only_this_sb) { while (!list_empty(&wb->b_io)) { long pages_skipped; struct inode *inode = list_entry(wb->b_io.prev, struct inode, i_list); - if (wbc->sb && sb != inode->i_sb) { - /* super block given and doesn't - match, skip this inode */ - redirty_tail(inode); - continue; - } - if (sb != inode->i_sb) - /* finish with this superblock */ + + if (inode->i_sb != sb) { + if (only_this_sb) { + /* + * We only want to write back data for this + * superblock, move all inodes not belonging + * to it back onto the dirty list. + */ + redirty_tail(inode); + continue; + } + + /* + * The inode belongs to a different superblock. + * Bounce back to the caller to unpin this and + * pin the next superblock. + */ return 0; + } + if (inode->i_state & (I_NEW | I_WILL_FREE)) { requeue_io(inode); continue; @@ -667,8 +626,8 @@ static int writeback_sb_inodes(struct super_block *sb, return 1; } -static void writeback_inodes_wb(struct bdi_writeback *wb, - struct writeback_control *wbc) +void writeback_inodes_wb(struct bdi_writeback *wb, + struct writeback_control *wbc) { int ret = 0; @@ -681,24 +640,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, struct inode *inode = list_entry(wb->b_io.prev, struct inode, i_list); struct super_block *sb = inode->i_sb; - enum sb_pin_state state; - if (wbc->sb && sb != wbc->sb) { - /* super block given and doesn't - match, skip this inode */ - redirty_tail(inode); - continue; - } - state = pin_sb_for_writeback(wbc, sb); - - if (state == SB_PIN_FAILED) { + if (!pin_sb_for_writeback(sb)) { requeue_io(inode); continue; } - ret = writeback_sb_inodes(sb, wb, wbc); + ret = writeback_sb_inodes(sb, wb, wbc, false); + drop_super(sb); - if (state == SB_PINNED) - unpin_sb_for_writeback(sb); if (ret) break; } @@ -706,11 +655,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, /* Leave any unwritten inodes on b_io */ } -void writeback_inodes_wbc(struct writeback_control *wbc) +static void __writeback_inodes_sb(struct super_block *sb, + struct bdi_writeback *wb, struct writeback_control *wbc) { - struct backing_dev_info *bdi = wbc->bdi; + WARN_ON(!rwsem_is_locked(&sb->s_umount)); - writeback_inodes_wb(&bdi->wb, wbc); + wbc->wb_start = jiffies; /* livelock avoidance */ + spin_lock(&inode_lock); + if (!wbc->for_kupdate || list_empty(&wb->b_io)) + queue_io(wb, wbc->older_than_this); + writeback_sb_inodes(sb, wb, wbc, true); + spin_unlock(&inode_lock); } /* @@ -751,8 +706,6 @@ static long wb_writeback(struct bdi_writeback *wb, struct wb_writeback_args *args) { struct writeback_control wbc = { - .bdi = wb->bdi, - .sb = args->sb, .sync_mode = args->sync_mode, .older_than_this = NULL, .for_kupdate = args->for_kupdate, @@ -790,7 +743,10 @@ static long wb_writeback(struct bdi_writeback *wb, wbc.more_io = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; - writeback_inodes_wb(wb, &wbc); + if (args->sb) + __writeback_inodes_sb(args->sb, wb, &wbc); + else + writeback_inodes_wb(wb, &wbc); args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; @@ -911,7 +867,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) * If this isn't a data integrity operation, just notify * that we have seen this work and we are now starting it. */ - if (args.sync_mode == WB_SYNC_NONE) + if (!test_bit(WS_ONSTACK, &work->state)) wb_clear_pending(wb, work); wrote += wb_writeback(wb, &args); @@ -920,7 +876,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) * This is a data integrity writeback, so only do the * notification when we have completed the work. */ - if (args.sync_mode == WB_SYNC_ALL) + if (test_bit(WS_ONSTACK, &work->state)) wb_clear_pending(wb, work); } @@ -978,42 +934,32 @@ int bdi_writeback_task(struct bdi_writeback *wb) } /* - * Schedule writeback for all backing devices. This does WB_SYNC_NONE - * writeback, for integrity writeback see bdi_sync_writeback(). + * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back + * the whole world. */ -static void bdi_writeback_all(struct super_block *sb, long nr_pages) +void wakeup_flusher_threads(long nr_pages) { + struct backing_dev_info *bdi; struct wb_writeback_args args = { - .sb = sb, - .nr_pages = nr_pages, .sync_mode = WB_SYNC_NONE, }; - struct backing_dev_info *bdi; - rcu_read_lock(); + if (nr_pages) { + args.nr_pages = nr_pages; + } else { + args.nr_pages = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS); + } + rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { if (!bdi_has_dirty_io(bdi)) continue; - bdi_alloc_queue_work(bdi, &args); } - rcu_read_unlock(); } -/* - * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back - * the whole world. - */ -void wakeup_flusher_threads(long nr_pages) -{ - if (nr_pages == 0) - nr_pages = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - bdi_writeback_all(NULL, nr_pages); -} - static noinline void block_dump___mark_inode_dirty(struct inode *inode) { if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { @@ -1218,12 +1164,17 @@ void writeback_inodes_sb(struct super_block *sb) { unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - long nr_to_write; + struct wb_writeback_args args = { + .sb = sb, + .sync_mode = WB_SYNC_NONE, + }; - nr_to_write = nr_dirty + nr_unstable + + WARN_ON(!rwsem_is_locked(&sb->s_umount)); + + args.nr_pages = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); - bdi_start_writeback(sb->s_bdi, sb, nr_to_write); + bdi_queue_work_onstack(&args); } EXPORT_SYMBOL(writeback_inodes_sb); @@ -1237,7 +1188,9 @@ EXPORT_SYMBOL(writeback_inodes_sb); int writeback_inodes_sb_if_idle(struct super_block *sb) { if (!writeback_in_progress(sb->s_bdi)) { + down_read(&sb->s_umount); writeback_inodes_sb(sb); + up_read(&sb->s_umount); return 1; } else return 0; @@ -1253,7 +1206,16 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle); */ void sync_inodes_sb(struct super_block *sb) { - bdi_sync_writeback(sb->s_bdi, sb); + struct wb_writeback_args args = { + .sb = sb, + .sync_mode = WB_SYNC_ALL, + .nr_pages = LONG_MAX, + .range_cyclic = 0, + }; + + WARN_ON(!rwsem_is_locked(&sb->s_umount)); + + bdi_queue_work_onstack(&args); wait_sb_inodes(sb); } EXPORT_SYMBOL(sync_inodes_sb); diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 076ca50e9933..c8ff0d1ae5d3 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -62,7 +62,9 @@ */ static void shrink_liability(struct ubifs_info *c, int nr_to_write) { + down_read(&c->vfs_sb->s_umount); writeback_inodes_sb(c->vfs_sb); + up_read(&c->vfs_sb->s_umount); } /** diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index aee5f6ce166e..9ae2889096b6 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -105,8 +105,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); void bdi_unregister(struct backing_dev_info *bdi); int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); -void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages); +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages); +void bdi_start_background_writeback(struct backing_dev_info *bdi); int bdi_writeback_task(struct bdi_writeback *wb); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_arm_supers_timer(void); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 09a840264d6f..b8224ea4a5de 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -467,11 +467,13 @@ struct request_queue #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ #define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */ +#define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ (1 << QUEUE_FLAG_STACKABLE) | \ - (1 << QUEUE_FLAG_SAME_COMP)) + (1 << QUEUE_FLAG_SAME_COMP) | \ + (1 << QUEUE_FLAG_ADD_RANDOM)) static inline int queue_is_locked(struct request_queue *q) { @@ -596,6 +598,7 @@ enum { test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) +#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 30da4ae48972..b8d2516668aa 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.8rc2" +#define REL_VERSION "8.3.8" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 94 diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d63ef8f9609f..c24eca71e80c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -27,10 +27,6 @@ enum writeback_sync_modes { * in a manner such that unspecified fields are set to zero. */ struct writeback_control { - struct backing_dev_info *bdi; /* If !NULL, only write back this - queue */ - struct super_block *sb; /* if !NULL, only write inodes from - this super_block */ enum writeback_sync_modes sync_mode; unsigned long *older_than_this; /* If !NULL, only write back inodes older than this */ @@ -66,7 +62,8 @@ int inode_wait(void *); void writeback_inodes_sb(struct super_block *); int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); -void writeback_inodes_wbc(struct writeback_control *wbc); +void writeback_inodes_wb(struct bdi_writeback *wb, + struct writeback_control *wbc); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); void wakeup_flusher_threads(long nr_pages); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 660a87a22511..6e0b09a1ec2c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -340,14 +340,13 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) static void bdi_flush_io(struct backing_dev_info *bdi) { struct writeback_control wbc = { - .bdi = bdi, .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .range_cyclic = 1, .nr_to_write = 1024, }; - writeback_inodes_wbc(&wbc); + writeback_inodes_wb(&bdi->wb, &wbc); } /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index bbd396ac9546..37498ef61548 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -495,7 +495,6 @@ static void balance_dirty_pages(struct address_space *mapping, for (;;) { struct writeback_control wbc = { - .bdi = bdi, .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .nr_to_write = write_chunk, @@ -537,7 +536,7 @@ static void balance_dirty_pages(struct address_space *mapping, * up. */ if (bdi_nr_reclaimable > bdi_thresh) { - writeback_inodes_wbc(&wbc); + writeback_inodes_wb(&bdi->wb, &wbc); pages_written += write_chunk - wbc.nr_to_write; get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); @@ -597,7 +596,7 @@ static void balance_dirty_pages(struct address_space *mapping, (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS)) > background_thresh))) - bdi_start_writeback(bdi, NULL, 0); + bdi_start_background_writeback(bdi); } void set_page_dirty_balance(struct page *page, int page_mkwrite) @@ -705,9 +704,8 @@ void laptop_mode_timer_fn(unsigned long data) * We want to write everything out, not just down to the dirty * threshold */ - if (bdi_has_dirty_io(&q->backing_dev_info)) - bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); + bdi_start_writeback(&q->backing_dev_info, nr_pages); } /* |