summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-11-04 22:18:23 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2018-11-04 22:21:00 -0500
commitdb8cffeaed01c917863d3f12b0203615a303ab8e (patch)
treec506b718c5c38dd1cb40030502ae565e548ce7b5
parent6da91e81cc8b84e0efff09b40dffa004b4c0be90 (diff)
Update bcachefs sources to 8bf4b038d4 bcachefs: Assorted fixes for running on very small devices
-rw-r--r--.bcachefs_revision2
-rw-r--r--libbcachefs.c2
-rw-r--r--libbcachefs/alloc_background.c52
-rw-r--r--libbcachefs/alloc_background.h4
-rw-r--r--libbcachefs/alloc_foreground.c113
-rw-r--r--libbcachefs/alloc_foreground.h11
-rw-r--r--libbcachefs/alloc_types.h4
-rw-r--r--libbcachefs/bcachefs.h8
-rw-r--r--libbcachefs/bcachefs_format.h4
-rw-r--r--libbcachefs/buckets.c11
-rw-r--r--libbcachefs/buckets.h6
-rw-r--r--libbcachefs/recovery.c4
-rw-r--r--libbcachefs/super-io.c2
-rw-r--r--libbcachefs/super.c3
14 files changed, 152 insertions, 74 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index b6371345..9f81e277 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-defaad6d47791d3e6285cba323f92847b6e4c226
+8bf4b038d41230504d3f0315a35e4d7a056e0a65
diff --git a/libbcachefs.c b/libbcachefs.c
index 3ce69d1b..c8738f40 100644
--- a/libbcachefs.c
+++ b/libbcachefs.c
@@ -26,8 +26,6 @@
#define NSEC_PER_SEC 1000000000L
-#define BCH_MIN_NR_NBUCKETS (1 << 10)
-
/* minimum size filesystem we can create, given a bucket size: */
static u64 min_size(unsigned bucket_size)
{
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index c3efb435..9ff61deb 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -373,6 +373,11 @@ static void bch2_rescale_bucket_io_times(struct bch_fs *c, int rw)
}
}
+static inline u64 bucket_clock_freq(u64 capacity)
+{
+ return max(capacity >> 10, 2028ULL);
+}
+
static void bch2_inc_clock_hand(struct io_timer *timer)
{
struct bucket_clock *clock = container_of(timer,
@@ -411,7 +416,7 @@ static void bch2_inc_clock_hand(struct io_timer *timer)
* RW mode (that will be 0 when we're RO, yet we can still service
* reads)
*/
- timer->expire += capacity >> 10;
+ timer->expire += bucket_clock_freq(capacity);
bch2_io_timer_add(&c->io_clock[clock->rw], timer);
}
@@ -423,7 +428,7 @@ static void bch2_bucket_clock_init(struct bch_fs *c, int rw)
clock->hand = 1;
clock->rw = rw;
clock->rescale.fn = bch2_inc_clock_hand;
- clock->rescale.expire = c->capacity >> 10;
+ clock->rescale.expire = bucket_clock_freq(c->capacity);
mutex_init(&clock->lock);
}
@@ -974,6 +979,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
{
struct bch_dev *ca;
u64 capacity = 0, reserved_sectors = 0, gc_reserve;
+ unsigned bucket_size_max = 0;
unsigned long ra_pages = 0;
unsigned i, j;
@@ -1009,14 +1015,9 @@ void bch2_recalc_capacity(struct bch_fs *c)
for (j = 0; j < RESERVE_NONE; j++)
dev_reserve += ca->free[j].size;
- dev_reserve += ca->free_inc.size;
-
- dev_reserve += ARRAY_SIZE(c->write_points);
-
dev_reserve += 1; /* btree write point */
dev_reserve += 1; /* copygc write point */
dev_reserve += 1; /* rebalance write point */
- dev_reserve += WRITE_POINT_COUNT;
dev_reserve *= ca->mi.bucket_size;
@@ -1026,6 +1027,9 @@ void bch2_recalc_capacity(struct bch_fs *c)
ca->mi.first_bucket);
reserved_sectors += dev_reserve * 2;
+
+ bucket_size_max = max_t(unsigned, bucket_size_max,
+ ca->mi.bucket_size);
}
gc_reserve = c->opts.gc_reserve_bytes
@@ -1038,6 +1042,8 @@ void bch2_recalc_capacity(struct bch_fs *c)
c->capacity = capacity - reserved_sectors;
+ c->bucket_size_max = bucket_size_max;
+
if (c->capacity) {
bch2_io_timer_add(&c->io_clock[READ],
&c->bucket_clock[READ].rescale);
@@ -1329,8 +1335,6 @@ not_enough:
* invalidated on disk:
*/
if (invalidating_data) {
- BUG();
- pr_info("holding writes");
pr_debug("invalidating existing data");
set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
} else {
@@ -1390,40 +1394,12 @@ int bch2_fs_allocator_start(struct bch_fs *c)
return bch2_alloc_write(c);
}
-void bch2_fs_allocator_init(struct bch_fs *c)
+void bch2_fs_allocator_background_init(struct bch_fs *c)
{
- struct open_bucket *ob;
- struct write_point *wp;
-
- mutex_init(&c->write_points_hash_lock);
spin_lock_init(&c->freelist_lock);
bch2_bucket_clock_init(c, READ);
bch2_bucket_clock_init(c, WRITE);
- /* open bucket 0 is a sentinal NULL: */
- spin_lock_init(&c->open_buckets[0].lock);
-
- for (ob = c->open_buckets + 1;
- ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
- spin_lock_init(&ob->lock);
- c->open_buckets_nr_free++;
-
- ob->freelist = c->open_buckets_freelist;
- c->open_buckets_freelist = ob - c->open_buckets;
- }
-
- writepoint_init(&c->btree_write_point, BCH_DATA_BTREE);
- writepoint_init(&c->rebalance_write_point, BCH_DATA_USER);
-
- for (wp = c->write_points;
- wp < c->write_points + ARRAY_SIZE(c->write_points); wp++) {
- writepoint_init(wp, BCH_DATA_USER);
-
- wp->last_used = sched_clock();
- wp->write_point = (unsigned long) wp;
- hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
- }
-
c->pd_controllers_update_seconds = 5;
INIT_DELAYED_WORK(&c->pd_controllers_update, pd_controllers_update);
}
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
index b5dbf7eb..ea07705b 100644
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@@ -5,7 +5,7 @@
#include "alloc_types.h"
#include "debug.h"
-#define ALLOC_SCAN_BATCH(ca) ((ca)->mi.nbuckets >> 9)
+#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
int bch2_alloc_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
@@ -56,6 +56,6 @@ int bch2_dev_allocator_start(struct bch_dev *);
int bch2_alloc_write(struct bch_fs *);
int bch2_fs_allocator_start(struct bch_fs *);
-void bch2_fs_allocator_init(struct bch_fs *);
+void bch2_fs_allocator_background_init(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index be94196e..06859960 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -491,7 +491,7 @@ void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
mutex_lock(&wp->lock);
open_bucket_for_each(c, &wp->ptrs, ob, i)
- if (ob->ptr.dev == ca->dev_idx)
+ if (!ca || ob->ptr.dev == ca->dev_idx)
open_bucket_free_unused(c, wp, ob);
else
ob_push(c, &ptrs, ob);
@@ -500,6 +500,15 @@ void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
mutex_unlock(&wp->lock);
}
+static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
+ unsigned long write_point)
+{
+ unsigned hash =
+ hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
+
+ return &c->write_points_hash[hash];
+}
+
static struct write_point *__writepoint_find(struct hlist_head *head,
unsigned long write_point)
{
@@ -512,6 +521,53 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
return NULL;
}
+static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
+{
+ u64 stranded = c->write_points_nr * c->bucket_size_max;
+ u64 free = bch2_fs_sectors_free(c, bch2_fs_usage_read(c));
+
+ return stranded * factor > free;
+}
+
+static bool try_increase_writepoints(struct bch_fs *c)
+{
+ struct write_point *wp;
+
+ if (c->write_points_nr == ARRAY_SIZE(c->write_points) ||
+ too_many_writepoints(c, 32))
+ return false;
+
+ wp = c->write_points + c->write_points_nr++;
+ hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point));
+ return true;
+}
+
+static bool try_decrease_writepoints(struct bch_fs *c,
+ unsigned old_nr)
+{
+ struct write_point *wp;
+
+ mutex_lock(&c->write_points_hash_lock);
+ if (c->write_points_nr < old_nr) {
+ mutex_unlock(&c->write_points_hash_lock);
+ return true;
+ }
+
+ if (c->write_points_nr == 1 ||
+ !too_many_writepoints(c, 8)) {
+ mutex_unlock(&c->write_points_hash_lock);
+ return false;
+ }
+
+ wp = c->write_points + --c->write_points_nr;
+
+ hlist_del_rcu(&wp->node);
+ mutex_unlock(&c->write_points_hash_lock);
+
+ bch2_writepoint_stop(c, NULL, wp);
+ return true;
+}
+
static struct write_point *writepoint_find(struct bch_fs *c,
unsigned long write_point)
{
@@ -535,16 +591,22 @@ lock_wp:
mutex_unlock(&wp->lock);
goto restart_find;
}
-
+restart_find_oldest:
oldest = NULL;
for (wp = c->write_points;
- wp < c->write_points + ARRAY_SIZE(c->write_points);
- wp++)
+ wp < c->write_points + c->write_points_nr; wp++)
if (!oldest || time_before64(wp->last_used, oldest->last_used))
oldest = wp;
mutex_lock(&oldest->lock);
mutex_lock(&c->write_points_hash_lock);
+ if (oldest >= c->write_points + c->write_points_nr ||
+ try_increase_writepoints(c)) {
+ mutex_unlock(&c->write_points_hash_lock);
+ mutex_unlock(&oldest->lock);
+ goto restart_find_oldest;
+ }
+
wp = __writepoint_find(head, write_point);
if (wp && wp != oldest) {
mutex_unlock(&c->write_points_hash_lock);
@@ -580,10 +642,12 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
unsigned nr_effective = 0;
struct open_buckets ptrs = { .nr = 0 };
bool have_cache = false;
+ unsigned write_points_nr;
int ret = 0, i;
BUG_ON(!nr_replicas || !nr_replicas_required);
-
+retry:
+ write_points_nr = c->write_points_nr;
wp = writepoint_find(c, write_point.v);
if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
@@ -636,6 +700,11 @@ err:
wp->ptrs = ptrs;
mutex_unlock(&wp->lock);
+
+ if (ret == -ENOSPC &&
+ try_decrease_writepoints(c, write_points_nr))
+ goto retry;
+
return ERR_PTR(ret);
}
@@ -687,3 +756,37 @@ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
bch2_open_buckets_put(c, &ptrs);
}
+
+void bch2_fs_allocator_foreground_init(struct bch_fs *c)
+{
+ struct open_bucket *ob;
+ struct write_point *wp;
+
+ mutex_init(&c->write_points_hash_lock);
+ c->write_points_nr = ARRAY_SIZE(c->write_points);
+
+ /* open bucket 0 is a sentinal NULL: */
+ spin_lock_init(&c->open_buckets[0].lock);
+
+ for (ob = c->open_buckets + 1;
+ ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) {
+ spin_lock_init(&ob->lock);
+ c->open_buckets_nr_free++;
+
+ ob->freelist = c->open_buckets_freelist;
+ c->open_buckets_freelist = ob - c->open_buckets;
+ }
+
+ writepoint_init(&c->btree_write_point, BCH_DATA_BTREE);
+ writepoint_init(&c->rebalance_write_point, BCH_DATA_USER);
+
+ for (wp = c->write_points;
+ wp < c->write_points + c->write_points_nr; wp++) {
+ writepoint_init(wp, BCH_DATA_USER);
+
+ wp->last_used = sched_clock();
+ wp->write_point = (unsigned long) wp;
+ hlist_add_head_rcu(&wp->node,
+ writepoint_hash(c, wp->write_point));
+ }
+}
diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h
index ae9844b5..729afc92 100644
--- a/libbcachefs/alloc_foreground.h
+++ b/libbcachefs/alloc_foreground.h
@@ -90,15 +90,6 @@ void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
struct write_point *);
-static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
- unsigned long write_point)
-{
- unsigned hash =
- hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
-
- return &c->write_points_hash[hash];
-}
-
static inline struct write_point_specifier writepoint_hashed(unsigned long v)
{
return (struct write_point_specifier) { .v = v | 1 };
@@ -116,4 +107,6 @@ static inline void writepoint_init(struct write_point *wp,
wp->type = type;
}
+void bch2_fs_allocator_foreground_init(struct bch_fs *);
+
#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc_types.h
index 94c041d2..110663ff 100644
--- a/libbcachefs/alloc_types.h
+++ b/libbcachefs/alloc_types.h
@@ -45,7 +45,9 @@ typedef FIFO(long) alloc_fifo;
/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */
#define OPEN_BUCKETS_COUNT 256
-#define WRITE_POINT_COUNT 32
+
+#define WRITE_POINT_HASH_NR 32
+#define WRITE_POINT_MAX 32
struct open_bucket {
spinlock_t lock;
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 6d5c7d6b..e23f45e8 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -322,7 +322,7 @@ enum bch_time_stats {
#define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1))
/* Size of the freelist we allocate btree nodes from: */
-#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4)
+#define BTREE_NODE_RESERVE BTREE_RESERVE_MAX
struct btree;
@@ -598,6 +598,7 @@ struct bch_fs {
* and forces them to be revalidated
*/
u32 capacity_gen;
+ unsigned bucket_size_max;
atomic64_t sectors_available;
@@ -627,9 +628,10 @@ struct bch_fs {
struct write_point btree_write_point;
struct write_point rebalance_write_point;
- struct write_point write_points[WRITE_POINT_COUNT];
- struct hlist_head write_points_hash[WRITE_POINT_COUNT];
+ struct write_point write_points[WRITE_POINT_MAX];
+ struct hlist_head write_points_hash[WRITE_POINT_HASH_NR];
struct mutex write_points_hash_lock;
+ unsigned write_points_nr;
/* GARBAGE COLLECTION */
struct task_struct *gc_thread;
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index cdf392b3..7ad080bf 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -904,6 +904,8 @@ struct bch_sb_field_journal {
/* BCH_SB_FIELD_members: */
+#define BCH_MIN_NR_NBUCKETS (1 << 6)
+
struct bch_member {
uuid_le uuid;
__le64 nbuckets; /* device size */
@@ -1381,7 +1383,7 @@ struct jset {
LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
-#define BCH_JOURNAL_BUCKETS_MIN 20
+#define BCH_JOURNAL_BUCKETS_MIN 8
/* Btree: */
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 271c02f1..15a07e36 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -299,11 +299,6 @@ u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
return min(c->capacity, __bch2_fs_sectors_used(c, stats));
}
-static u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats)
-{
- return c->capacity - bch2_fs_sectors_used(c, stats);
-}
-
static inline int is_unavailable_bucket(struct bucket_mark m)
{
return !is_available_bucket(m);
@@ -883,9 +878,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
ca->mi.bucket_size / c->opts.btree_node_size);
/* XXX: these should be tunable */
- size_t reserve_none = max_t(size_t, 4, nbuckets >> 9);
- size_t copygc_reserve = max_t(size_t, 16, nbuckets >> 7);
- size_t free_inc_nr = max(max_t(size_t, 16, nbuckets >> 12),
+ size_t reserve_none = max_t(size_t, 1, nbuckets >> 9);
+ size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 7);
+ size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12),
btree_reserve);
bool resize = ca->buckets != NULL,
start_copygc = ca->copygc_thread != NULL;
diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h
index d9fe938a..17b82cd0 100644
--- a/libbcachefs/buckets.h
+++ b/libbcachefs/buckets.h
@@ -174,6 +174,12 @@ void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
+static inline u64 bch2_fs_sectors_free(struct bch_fs *c,
+ struct bch_fs_usage stats)
+{
+ return c->capacity - bch2_fs_sectors_used(c, stats);
+}
+
static inline bool is_available_bucket(struct bucket_mark mark)
{
return (!mark.owned_by_allocator &&
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 902f39f6..f530f202 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -277,7 +277,7 @@ out:
return ret;
err:
fsck_err:
- BUG_ON(!ret);
+ pr_err("Error in recovery: %s (%i)", err, ret);
goto out;
}
@@ -380,6 +380,6 @@ int bch2_fs_initialize(struct bch_fs *c)
return 0;
err:
- BUG_ON(!ret);
+ pr_err("Error initializing new filesystem: %s (%i)", err, ret);
return ret;
}
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index 54de9fac..8ef5db3d 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -808,7 +808,7 @@ static const char *bch2_sb_validate_members(struct bch_sb *sb,
return "Too many buckets";
if (le64_to_cpu(m->nbuckets) -
- le16_to_cpu(m->first_bucket) < 1 << 10)
+ le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS)
return "Not enough buckets";
if (le16_to_cpu(m->bucket_size) <
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index be28d40f..b7a6f5fb 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -556,7 +556,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_init(&c->times[i]);
- bch2_fs_allocator_init(c);
+ bch2_fs_allocator_background_init(c);
+ bch2_fs_allocator_foreground_init(c);
bch2_fs_rebalance_init(c);
bch2_fs_quota_init(c);