1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
|
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHE_BACKINGDEV_H
#define _BCACHE_BACKINGDEV_H
#include <linux/bio.h>
#include <linux/closure.h>
#include <linux/kobject.h>
#include <linux/list.h>
#include <linux/bcache_superblock.h>
#include <linux/bcache/ratelimit.h>
#include "stats.h"
#include "super.h"
struct search;
struct btree;
struct bcache_device {
struct closure cl;
struct kobject kobj;
struct cache_set *c;
struct bch_fs *c2;
u64 id;
struct inode *inode;
#define BCACHEDEVNAME_SIZE 12
char name[BCACHEDEVNAME_SIZE];
struct gendisk *disk;
unsigned long flags;
#define BCACHE_DEV_CLOSING 0
#define BCACHE_DEV_DETACHING 1
#define BCACHE_DEV_UNLINK_DONE 2
#define BCACHE_DEV_WB_RUNNING 3
#define BCACHE_DEV_RATE_DW_RUNNING 4
unsigned int nr_stripes;
unsigned int stripe_size;
atomic_t *stripe_sectors_dirty;
unsigned long *full_dirty_stripes;
struct bio_set bio_split;
unsigned int data_csum:1;
int (*cache_miss)(struct btree *b, struct search *s,
struct bio *bio, unsigned int sectors);
int (*ioctl)(struct bcache_device *d, fmode_t mode,
unsigned int cmd, unsigned long arg);
};
static inline bool bcache_dev_is_attached(struct bcache_device *d)
{
return d->c != NULL || d->c2 != NULL;
}
enum stop_on_failure {
BCH_CACHED_DEV_STOP_AUTO = 0,
BCH_CACHED_DEV_STOP_ALWAYS,
BCH_CACHED_DEV_STOP_MODE_MAX,
};
struct io {
/* Used to track sequential IO so it can be skipped */
struct hlist_node hash;
struct list_head lru;
unsigned long jiffies;
unsigned int sequential;
sector_t last;
};
struct cached_dev {
struct list_head list;
struct bcache_device disk;
struct block_device *bdev;
struct cache_sb sb;
struct cache_sb_disk *sb_disk;
struct bio sb_bio;
struct bio_vec sb_bv[1];
struct closure sb_write;
struct semaphore sb_write_mutex;
/* Refcount on the cache set. Always nonzero when we're caching. */
refcount_t count;
struct work_struct detach;
/*
* Device might not be running if it's dirty and the cache set hasn't
* showed up yet.
*/
atomic_t running;
struct bio_set bch2_bio_read;
mempool_t bch2_io_write;
/*
* Writes take a shared lock from start to finish; scanning for dirty
* data to refill the rb tree requires an exclusive lock.
*/
struct rw_semaphore writeback_lock;
/*
* Nonzero, and writeback has a refcount (d->count), iff there is dirty
* data in the cache. Protected by writeback_lock; must have an
* shared lock to set and exclusive lock to clear.
*/
atomic_t has_dirty;
#define BCH_CACHE_READA_ALL 0
#define BCH_CACHE_READA_META_ONLY 1
unsigned int cache_readahead_policy;
struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;
/* Limit number of writeback bios in flight */
struct semaphore in_flight;
struct task_struct *writeback_thread;
struct workqueue_struct *writeback_write_wq;
struct keybuf *writeback_keys;
struct task_struct *status_update_thread;
/*
* Order the write-half of writeback operations strongly in dispatch
* order. (Maintain LBA order; don't allow reads completing out of
* order to re-order the writes...)
*/
struct closure_waitlist writeback_ordering_wait;
atomic_t writeback_sequence_next;
/* For tracking sequential IO */
#define RECENT_IO_BITS 7
#define RECENT_IO (1 << RECENT_IO_BITS)
struct io io[RECENT_IO];
struct hlist_head io_hash[RECENT_IO + 1];
struct list_head io_lru;
spinlock_t io_lock;
struct cache_accounting accounting;
/* The rest of this all shows up in sysfs */
unsigned int sequential_cutoff;
unsigned int readahead;
unsigned int io_disable:1;
unsigned int verify:1;
unsigned int bypass_torture_test:1;
unsigned int partial_stripes_expensive:1;
unsigned int writeback_metadata:1;
unsigned int writeback_running:1;
unsigned char writeback_percent;
unsigned int writeback_delay;
uint64_t writeback_rate_target;
int64_t writeback_rate_proportional;
int64_t writeback_rate_integral;
int64_t writeback_rate_integral_scaled;
int32_t writeback_rate_change;
unsigned int writeback_rate_update_seconds;
unsigned int writeback_rate_i_term_inverse;
unsigned int writeback_rate_p_term_inverse;
unsigned int writeback_rate_minimum;
enum stop_on_failure stop_when_cache_set_failed;
#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
atomic_t io_errors;
unsigned int error_limit;
unsigned int offline_seconds;
char backing_dev_name[BDEVNAME_SIZE];
};
static inline unsigned int cache_mode(struct cached_dev *dc)
{
return BDEV_CACHE_MODE(&dc->sb);
}
extern unsigned int bch_cutoff_writeback;
extern unsigned int bch_cutoff_writeback_sync;
static inline void cached_dev_put(struct cached_dev *dc)
{
if (refcount_dec_and_test(&dc->count))
schedule_work(&dc->detach);
}
static inline bool cached_dev_get(struct cached_dev *dc)
{
if (!refcount_inc_not_zero(&dc->count))
return false;
/* Paired with the mb in cached_dev_attach */
smp_mb__after_atomic();
return true;
}
static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
{
uint64_t i, ret = 0;
for (i = 0; i < d->nr_stripes; i++)
ret += atomic_read(d->stripe_sectors_dirty + i);
return ret;
}
static inline unsigned int offset_to_stripe(struct bcache_device *d,
uint64_t offset)
{
do_div(offset, d->stripe_size);
return offset;
}
static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
uint64_t offset,
unsigned int nr_sectors)
{
unsigned int stripe = offset_to_stripe(&dc->disk, offset);
while (1) {
if (atomic_read(dc->disk.stripe_sectors_dirty + stripe))
return true;
if (nr_sectors <= dc->disk.stripe_size)
return false;
nr_sectors -= dc->disk.stripe_size;
stripe++;
}
}
static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
unsigned int cache_mode, bool would_skip,
unsigned int in_use)
{
if (cache_mode != CACHE_MODE_WRITEBACK ||
test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
in_use > bch_cutoff_writeback_sync)
return false;
if (bio_op(bio) == REQ_OP_DISCARD)
return false;
if (dc->partial_stripes_expensive &&
bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector,
bio_sectors(bio)))
return true;
if (would_skip)
return false;
return (op_is_sync(bio->bi_opf) ||
bio->bi_opf & (REQ_META|REQ_PRIO) ||
in_use <= bch_cutoff_writeback);
}
static inline void bch_writeback_queue(struct cached_dev *dc)
{
if (!IS_ERR_OR_NULL(dc->writeback_thread))
wake_up_process(dc->writeback_thread);
}
static inline void bch_writeback_add(struct cached_dev *dc)
{
if (!atomic_read(&dc->has_dirty) &&
!atomic_xchg(&dc->has_dirty, 1)) {
if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
/* XXX: should do this synchronously */
bch_write_bdev_super(dc, NULL);
}
bch_writeback_queue(dc);
}
}
#define CUTOFF_CACHE_ADD 95
#define CUTOFF_CACHE_READA 90
#endif /* _BCACHE_BACKINGDEV_H */
|