Separate out the concept of "queue congestion" from "backing-dev congestion".
Congestion is a backing-dev concept, not a queue concept.
The blk_* congestion functions are retained, as wrappers around the core
backing-dev congestion functions.
This proper layering is needed so that NFS can cleanly use the congestion
functions, and so that CONFIG_BLOCK=n actually links.
Cc: "Thomas Maier" <balagi@justmail.de>
Cc: "Jens Axboe" <jens.axboe@oracle.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: David Howells <dhowells@redhat.com>
Cc: Peter Osterlund <petero2@telia.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
#include <linux/highmem.h>
#include <linux/blkdev.h>
#include <linux/module.h>
+#include <linux/backing-dev.h>
#include <asm/uaccess.h>
#include <asm/mmx.h>
if (retval == -ENOMEM && is_init(current)) {
up_read(¤t->mm->mmap_sem);
- blk_congestion_wait(WRITE, HZ/50);
+ congestion_wait(WRITE, HZ/50);
goto survive;
}
*/
static kmem_cache_t *iocontext_cachep;
-static wait_queue_head_t congestion_wqh[2] = {
- __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
- __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
- };
-
/*
* Controlling structure to kblockd
*/
q->nr_congestion_off = nr;
}
-/*
- * A queue has just exitted congestion. Note this in the global counter of
- * congested queues, and wake up anyone who was waiting for requests to be
- * put back.
- */
-void blk_clear_queue_congested(request_queue_t *q, int rw)
-{
- enum bdi_state bit;
- wait_queue_head_t *wqh = &congestion_wqh[rw];
-
- bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
- clear_bit(bit, &q->backing_dev_info.state);
- smp_mb__after_clear_bit();
- if (waitqueue_active(wqh))
- wake_up(wqh);
-}
-EXPORT_SYMBOL(blk_clear_queue_congested);
-
-/*
- * A queue has just entered congestion. Flag that in the queue's VM-visible
- * state flags and increment the global gounter of congested queues.
- */
-void blk_set_queue_congested(request_queue_t *q, int rw)
-{
- enum bdi_state bit;
-
- bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
- set_bit(bit, &q->backing_dev_info.state);
-}
-EXPORT_SYMBOL(blk_set_queue_congested);
-
/**
* blk_get_backing_dev_info - get the address of a queue's backing_dev_info
* @bdev: device
}
EXPORT_SYMBOL(blk_end_sync_rq);
-/**
- * blk_congestion_wait - wait for a queue to become uncongested
- * @rw: READ or WRITE
- * @timeout: timeout in jiffies
- *
- * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.
- * If no queues are congested then just wait for the next request to be
- * returned.
- */
-long blk_congestion_wait(int rw, long timeout)
-{
- long ret;
- DEFINE_WAIT(wait);
- wait_queue_head_t *wqh = &congestion_wqh[rw];
-
- prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
- ret = io_schedule_timeout(timeout);
- finish_wait(wqh, &wait);
- return ret;
-}
-
-EXPORT_SYMBOL(blk_congestion_wait);
-
-/**
- * blk_congestion_end - wake up sleepers on a congestion queue
- * @rw: READ or WRITE
- */
-void blk_congestion_end(int rw)
-{
- wait_queue_head_t *wqh = &congestion_wqh[rw];
-
- if (waitqueue_active(wqh))
- wake_up(wqh);
-}
-
/*
* Has to be called with the request spinlock acquired
*/
#include <linux/slab.h>
#include <linux/crypto.h>
#include <linux/workqueue.h>
+#include <linux/backing-dev.h>
#include <asm/atomic.h>
#include <linux/scatterlist.h>
#include <asm/page.h>
/* out of memory -> run queues */
if (remaining)
- blk_congestion_wait(bio_data_dir(clone), HZ/100);
+ congestion_wait(bio_data_dir(clone), HZ/100);
}
}
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
+#include <linux/backing-dev.h>
#include <linux/blkdev.h>
int fat_generic_ioctl(struct inode *inode, struct file *filp,
if ((filp->f_mode & FMODE_WRITE) &&
MSDOS_SB(inode->i_sb)->options.flush) {
fat_flush_inodes(inode->i_sb, inode, NULL);
- blk_congestion_wait(WRITE, HZ/10);
+ congestion_wait(WRITE, HZ/10);
}
return 0;
}
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
+#include <linux/backing-dev.h>
+
#include <asm/uaccess.h>
#include <linux/smp_lock.h>
out:
clear_bit(BDI_write_congested, &bdi->state);
wake_up_all(&nfs_write_congestion);
- writeback_congestion_end();
+ congestion_end(WRITE);
return err;
}
#include <linux/workqueue.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
/* gets a struct reiserfs_journal_list * from a list head */
#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
DEFINE_WAIT(wait);
struct reiserfs_journal *j = SB_JOURNAL(s);
if (atomic_read(&j->j_async_throttle))
- blk_congestion_wait(WRITE, HZ / 10);
+ congestion_wait(WRITE, HZ / 10);
return 0;
}
#include <linux/highmem.h>
#include <linux/swap.h>
#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
#include "time.h"
#include "kmem.h"
printk(KERN_ERR "XFS: possible memory allocation "
"deadlock in %s (mode:0x%x)\n",
__FUNCTION__, lflags);
- blk_congestion_wait(WRITE, HZ/50);
+ congestion_wait(WRITE, HZ/50);
} while (1);
}
printk(KERN_ERR "XFS: possible memory allocation "
"deadlock in %s (mode:0x%x)\n",
__FUNCTION__, lflags);
- blk_congestion_wait(WRITE, HZ/50);
+ congestion_wait(WRITE, HZ/50);
} while (1);
}
#include <linux/hash.h>
#include <linux/kthread.h>
#include <linux/migrate.h>
+#include <linux/backing-dev.h>
#include "xfs_linux.h"
STATIC kmem_zone_t *xfs_buf_zone;
XFS_STATS_INC(xb_page_retries);
xfsbufd_wakeup(0, gfp_mask);
- blk_congestion_wait(WRITE, HZ/50);
+ congestion_wait(WRITE, HZ/50);
goto retry;
}
#include <asm/atomic.h>
+struct page;
+
/*
* Bits in backing_dev_info.state
*/
(1 << BDI_write_congested));
}
+void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
+void set_bdi_congested(struct backing_dev_info *bdi, int rw);
+long congestion_wait(int rw, long timeout);
+void congestion_end(int rw);
+
#define bdi_cap_writeback_dirty(bdi) \
(!((bdi)->capabilities & BDI_CAP_NO_WRITEBACK))
extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *);
extern int sg_scsi_ioctl(struct file *, struct request_queue *,
struct gendisk *, struct scsi_ioctl_command __user *);
-extern void blk_clear_queue_congested(request_queue_t *q, int rw);
-extern void blk_set_queue_congested(request_queue_t *q, int rw);
+
+/*
+ * A queue has just exitted congestion. Note this in the global counter of
+ * congested queues, and wake up anyone who was waiting for requests to be
+ * put back.
+ */
+static inline void blk_clear_queue_congested(request_queue_t *q, int rw)
+{
+ clear_bdi_congested(&q->backing_dev_info, rw);
+}
+
+/*
+ * A queue has just entered congestion. Flag that in the queue's VM-visible
+ * state flags and increment the global gounter of congested queues.
+ */
+static inline void blk_set_queue_congested(request_queue_t *q, int rw)
+{
+ set_bdi_congested(&q->backing_dev_info, rw);
+}
+
extern void blk_start_queue(request_queue_t *q);
extern void blk_stop_queue(request_queue_t *q);
extern void blk_sync_queue(struct request_queue *q);
extern void blk_queue_free_tags(request_queue_t *);
extern int blk_queue_resize_tags(request_queue_t *, int);
extern void blk_queue_invalidate_tags(request_queue_t *);
-extern long blk_congestion_wait(int rw, long timeout);
extern struct blk_queue_tag *blk_init_tags(int);
extern void blk_free_tags(struct blk_queue_tag *);
-extern void blk_congestion_end(int rw);
static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
int tag)
void laptop_io_completion(void);
void laptop_sync_completion(void);
void throttle_vm_writeout(void);
-void writeback_congestion_end(void);
/* These are exported to sysctl. */
extern int dirty_background_ratio;
obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
page_alloc.o page-writeback.o pdflush.o \
readahead.o swap.o truncate.o vmscan.o \
- prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
+ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
+ $(mmu-y)
ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
obj-y += bounce.o
--- /dev/null
+
+#include <linux/wait.h>
+#include <linux/backing-dev.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+static wait_queue_head_t congestion_wqh[2] = {
+ __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
+ __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
+ };
+
+
+void clear_bdi_congested(struct backing_dev_info *bdi, int rw)
+{
+ enum bdi_state bit;
+ wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+ bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
+ clear_bit(bit, &bdi->state);
+ smp_mb__after_clear_bit();
+ if (waitqueue_active(wqh))
+ wake_up(wqh);
+}
+EXPORT_SYMBOL(clear_bdi_congested);
+
+void set_bdi_congested(struct backing_dev_info *bdi, int rw)
+{
+ enum bdi_state bit;
+
+ bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
+ set_bit(bit, &bdi->state);
+}
+EXPORT_SYMBOL(set_bdi_congested);
+
+/**
+ * congestion_wait - wait for a backing_dev to become uncongested
+ * @rw: READ or WRITE
+ * @timeout: timeout in jiffies
+ *
+ * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
+ * write congestion. If no backing_devs are congested then just wait for the
+ * next write to be completed.
+ */
+long congestion_wait(int rw, long timeout)
+{
+ long ret;
+ DEFINE_WAIT(wait);
+ wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+ prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+ ret = io_schedule_timeout(timeout);
+ finish_wait(wqh, &wait);
+ return ret;
+}
+EXPORT_SYMBOL(congestion_wait);
+
+/**
+ * congestion_end - wake up sleepers on a congested backing_dev_info
+ * @rw: READ or WRITE
+ */
+void congestion_end(int rw)
+{
+ wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+ if (waitqueue_active(wqh))
+ wake_up(wqh);
+}
+EXPORT_SYMBOL(congestion_end);
if (pages_written >= write_chunk)
break; /* We've done our duty */
}
- blk_congestion_wait(WRITE, HZ/10);
+ congestion_wait(WRITE, HZ/10);
}
if (nr_reclaimable + global_page_state(NR_WRITEBACK)
if (global_page_state(NR_UNSTABLE_NFS) +
global_page_state(NR_WRITEBACK) <= dirty_thresh)
break;
- blk_congestion_wait(WRITE, HZ/10);
+ congestion_wait(WRITE, HZ/10);
}
}
min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
/* Wrote less than expected */
- blk_congestion_wait(WRITE, HZ/10);
+ congestion_wait(WRITE, HZ/10);
if (!wbc.encountered_congestion)
break;
}
writeback_inodes(&wbc);
if (wbc.nr_to_write > 0) {
if (wbc.encountered_congestion)
- blk_congestion_wait(WRITE, HZ/10);
+ congestion_wait(WRITE, HZ/10);
else
break; /* All the old data is written */
}
}
EXPORT_SYMBOL(test_set_page_writeback);
-/*
- * Wakes up tasks that are being throttled due to writeback congestion
- */
-void writeback_congestion_end(void)
-{
- blk_congestion_end(WRITE);
-}
-EXPORT_SYMBOL(writeback_congestion_end);
-
/*
* Return true if any of the pages in the mapping are marged with the
* passed tag.
#include <linux/stop_machine.h>
#include <linux/sort.h>
#include <linux/pfn.h>
+#include <linux/backing-dev.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
if (page)
goto got_pg;
if (gfp_mask & __GFP_NOFAIL) {
- blk_congestion_wait(WRITE, HZ/50);
+ congestion_wait(WRITE, HZ/50);
goto nofail_alloc;
}
}
do_retry = 1;
}
if (do_retry) {
- blk_congestion_wait(WRITE, HZ/50);
+ congestion_wait(WRITE, HZ/50);
goto rebalance;
}
#include <linux/ctype.h>
#include <linux/migrate.h>
#include <linux/highmem.h>
+#include <linux/backing-dev.h>
#include <asm/uaccess.h>
#include <asm/div64.h>
page_cache_release(swappage);
if (error == -ENOMEM) {
/* let kswapd refresh zone for GFP_ATOMICs */
- blk_congestion_wait(WRITE, HZ/50);
+ congestion_wait(WRITE, HZ/50);
}
goto repeat;
}
/* Take a nap, wait for some writeback to complete */
if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
- blk_congestion_wait(WRITE, HZ/10);
+ congestion_wait(WRITE, HZ/10);
}
/* top priority shrink_caches still had more to do? don't OOM, then */
if (!sc.all_unreclaimable)
* another pass across the zones.
*/
if (total_scanned && priority < DEF_PRIORITY - 2)
- blk_congestion_wait(WRITE, HZ/10);
+ congestion_wait(WRITE, HZ/10);
/*
* We do this so kswapd doesn't build up large priorities for
goto out;
if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
- blk_congestion_wait(WRITE, HZ / 10);
+ congestion_wait(WRITE, HZ / 10);
}
lru_pages = 0;