summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2020-02-19 17:02:04 -0800
committerDarrick J. Wong <darrick.wong@oracle.com>2020-06-01 21:16:35 -0700
commit11ccd1f3e71f45a0223b95f1c0640ef688cf96f9 (patch)
tree931ace461b2fe63b24a13660d45ca426a91fd1a7
parent72d96707db7e31a622fc27485bdb0a6ed5451ef0 (diff)
xfs: flush speculative space allocations when we run out of quota
If a fs modification (creation, file write, reflink, etc.) is unable to reserve enough quota to handle the modification, try clearing whatever space the filesystem might have been hanging onto in the hopes of speeding up the filesystem. The flushing behavior will become particularly important when we add deferred inode inactivation because that will increase the amount of space that isn't actively tied to user data. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/xfs_bmap_util.c16
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_icache.c9
-rw-r--r--fs/xfs/xfs_icache.h2
-rw-r--r--fs/xfs/xfs_inode.c17
-rw-r--r--fs/xfs/xfs_ioctl.c2
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_reflink.c40
-rw-r--r--fs/xfs/xfs_trace.c1
-rw-r--r--fs/xfs/xfs_trace.h40
10 files changed, 141 insertions, 7 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cc23a3e23e2d..5d4ad197d756 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -761,6 +761,7 @@ xfs_alloc_file_space(
*/
while (allocatesize_fsb && !error) {
xfs_fileoff_t s, e;
+ bool cleared_space = false;
/*
* Determine space reservations for data/realtime.
@@ -803,6 +804,7 @@ xfs_alloc_file_space(
/*
* Allocate and setup the transaction.
*/
+retry:
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
resrtextents, 0, &tp);
@@ -819,6 +821,20 @@ xfs_alloc_file_space(
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
0, quota_flag);
+ /*
+ * We weren't able to reserve enough quota to handle fallocate.
+ * Flush any disk space that was being held in the hopes of
+ * speeding up the filesystem. We hold the IOLOCK so we cannot
+ * do a synchronous scan.
+ */
+ if ((error == -ENOSPC || error == -EDQUOT) && !cleared_space) {
+ xfs_trans_cancel(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ cleared_space = xfs_inode_free_quota_blocks(ip, false);
+ if (cleared_space)
+ goto retry;
+ return error;
+ }
if (error)
goto error1;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 626501aca5db..766b9c9acc63 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -650,7 +650,7 @@ write_retry:
*/
if (ret == -EDQUOT && !cleared_space) {
xfs_iunlock(ip, iolock);
- cleared_space = xfs_inode_free_quota_blocks(ip);
+ cleared_space = xfs_inode_free_quota_blocks(ip, true);
if (cleared_space)
goto write_retry;
iolock = 0;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 8993a4ccb879..b1e598543b6e 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1558,7 +1558,8 @@ xfs_icache_free_eofblocks(
*/
bool
xfs_inode_free_quota_blocks(
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ bool sync)
{
struct xfs_eofblocks eofb = {0};
struct xfs_dquot *dq;
@@ -1568,7 +1569,9 @@ xfs_inode_free_quota_blocks(
* Run a sync scan to increase effectiveness and use the union filter to
* cover all applicable quotas in a single scan.
*/
- eofb.eof_flags = XFS_EOF_FLAGS_UNION | XFS_EOF_FLAGS_SYNC;
+ eofb.eof_flags = XFS_EOF_FLAGS_UNION;
+ if (sync)
+ eofb.eof_flags |= XFS_EOF_FLAGS_SYNC;
if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
dq = xfs_inode_dquot(ip, XFS_DQ_USER);
@@ -1600,6 +1603,8 @@ xfs_inode_free_quota_blocks(
if (!do_work)
return false;
+ trace_xfs_inode_free_quota_blocks(ip->i_mount, &eofb, _RET_IP_);
+
xfs_icache_free_eofblocks(ip->i_mount, &eofb);
xfs_icache_free_cowblocks(ip->i_mount, &eofb);
return true;
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 41b3dc922820..ea77c67adacd 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -52,7 +52,7 @@ long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
-bool xfs_inode_free_quota_blocks(struct xfs_inode *ip);
+bool xfs_inode_free_quota_blocks(struct xfs_inode *ip, bool sync);
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 15f36a4a7a45..a22cf9f6fe01 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1125,6 +1125,7 @@ xfs_create(
struct xfs_dquot *gdqp = NULL;
struct xfs_dquot *pdqp = NULL;
struct xfs_trans_res *tres;
+ bool cleared_space = false;
uint resblks;
trace_xfs_create(dp, name);
@@ -1157,6 +1158,7 @@ xfs_create(
* the case we'll drop the one we have and get a more
* appropriate transaction later.
*/
+retry:
error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
if (error == -ENOSPC) {
/* flush outstanding delalloc blocks and retry */
@@ -1174,6 +1176,21 @@ xfs_create(
*/
error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
pdqp, resblks, 1, 0);
+ /*
+ * We weren't able to reserve enough quota to handle adding the inode.
+ * Flush any disk space that was being held in the hopes of speeding up
+ * the filesystem.
+ */
+ if ((error == -EDQUOT || error == -ENOSPC) && !cleared_space) {
+ xfs_trans_cancel(tp);
+ if (unlock_dp_on_error)
+ xfs_iunlock(dp, XFS_ILOCK_EXCL);
+ unlock_dp_on_error = false;
+ cleared_space = xfs_inode_free_quota_blocks(dp, true);
+ if (cleared_space)
+ goto retry;
+ goto out_release_inode;
+ }
if (error)
goto out_trans_cancel;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6a3c675a8aeb..b305dba83f46 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -2398,6 +2398,8 @@ xfs_file_ioctl(
if (error)
return error;
+ trace_xfs_ioc_free_eofblocks(mp, &keofb, _RET_IP_);
+
sb_start_write(mp->m_super);
error = xfs_icache_free_eofblocks(mp, &keofb);
sb_end_write(mp->m_super);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 00ad00caab91..39b0fb7f7d6a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -28,6 +28,7 @@
#include "xfs_dquot.h"
#include "xfs_reflink.h"
#include "xfs_health.h"
+#include "xfs_icache.h"
#define XFS_ALLOC_ALIGN(mp, off) \
(((off) >> mp->m_allocsize_log) << mp->m_allocsize_log)
@@ -202,6 +203,7 @@ xfs_iomap_write_direct(
int error;
int bmapi_flags = XFS_BMAPI_PREALLOC;
uint tflags = 0;
+ bool cleared_space = false;
ASSERT(count_fsb > 0);
@@ -241,6 +243,7 @@ xfs_iomap_write_direct(
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
}
}
+retry:
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, resrtextents,
tflags, &tp);
if (error)
@@ -249,6 +252,22 @@ xfs_iomap_write_direct(
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
+ /*
+ * We weren't able to reserve enough quota for the direct write.
+ * Flush any disk space that was being held in the hopes of speeding up
+ * the filesystem. Historically, we expected callers to have
+ * preallocated all the space before a direct write, but this is not an
+ * absolute requirement. We still hold the IOLOCK so we cannot do a
+ * sync scan.
+ */
+ if ((error == -ENOSPC || error == -EDQUOT) && !cleared_space) {
+ xfs_trans_cancel(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ cleared_space = xfs_inode_free_quota_blocks(ip, false);
+ if (cleared_space)
+ goto retry;
+ return error;
+ }
if (error)
goto out_trans_cancel;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index f50a8c2f21a5..33fdd6c67c32 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -351,13 +351,14 @@ xfs_reflink_allocate_cow(
bool convert_now)
{
struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
xfs_fileoff_t offset_fsb = imap->br_startoff;
xfs_filblks_t count_fsb = imap->br_blockcount;
- struct xfs_trans *tp;
- int nimaps, error = 0;
- bool found;
xfs_filblks_t resaligned;
xfs_extlen_t resblks = 0;
+ bool found;
+ bool cleared_space = false;
+ int nimaps, error = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (!ip->i_cowfp) {
@@ -376,6 +377,7 @@ xfs_reflink_allocate_cow(
resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
xfs_iunlock(ip, *lockmode);
+retry:
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
*lockmode = XFS_ILOCK_EXCL;
xfs_ilock(ip, *lockmode);
@@ -400,6 +402,23 @@ xfs_reflink_allocate_cow(
error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
XFS_QMOPT_RES_REGBLKS);
+ /*
+ * We weren't able to reserve enough quota to handle copy on write.
+ * Flush any disk space that was being held in the hopes of speeding up
+ * the filesystem. We potentially hold the IOLOCK so we cannot do a
+ * synchronous scan.
+ */
+ if ((error == -ENOSPC || error == -EDQUOT) && !cleared_space) {
+ xfs_trans_cancel(tp);
+ xfs_iunlock(ip, *lockmode);
+ *lockmode = 0;
+ cleared_space = xfs_inode_free_quota_blocks(ip, false);
+ if (cleared_space)
+ goto retry;
+ *lockmode = XFS_ILOCK_EXCL;
+ xfs_ilock(ip, *lockmode);
+ return error;
+ }
if (error)
goto out_trans_cancel;
@@ -1000,9 +1019,11 @@ xfs_reflink_remap_extent(
int64_t ip_delta = 0;
unsigned int resblks;
bool real_extent = xfs_bmap_is_real_extent(imap);
+ bool cleared_space = false;
int nimaps;
int error;
+retry:
/* Start a rolling transaction to switch the mappings */
resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
@@ -1061,6 +1082,19 @@ xfs_reflink_remap_extent(
/* Do we have enough quota? */
error = xfs_trans_reserve_quota_nblks(tp, ip,
imap->br_blockcount, 0, XFS_QMOPT_RES_REGBLKS);
+ /*
+ * We weren't able to reserve enough quota for the remapping.
+ * Flush any disk space that was being held in the hopes of
+ * speeding up the filesystem. We still hold the IOLOCK so we
+ * cannot do a sync scan.
+ */
+ if ((error == -ENOSPC || error == -EDQUOT) && !cleared_space) {
+ xfs_trans_cancel(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ cleared_space = xfs_inode_free_quota_blocks(ip, false);
+ if (cleared_space)
+ goto retry;
+ }
if (error)
goto out_cancel;
}
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 120398a37c2a..9b8d703dc9fd 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -29,6 +29,7 @@
#include "xfs_filestream.h"
#include "xfs_fsmap.h"
#include "xfs_btree_staging.h"
+#include "xfs_icache.h"
/*
* We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index f56717e874e9..c9962df1bf55 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -36,6 +36,7 @@ struct xfs_owner_info;
struct xfs_trans_res;
struct xfs_inobt_rec_incore;
union xfs_btree_ptr;
+struct xfs_eofblocks;
#define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \
@@ -3704,6 +3705,45 @@ TRACE_EVENT(xfs_btree_bload_block,
__entry->nr_records)
)
+DECLARE_EVENT_CLASS(xfs_eofblocks_class,
+ TP_PROTO(struct xfs_mount *mp, struct xfs_eofblocks *eofb,
+ unsigned long caller_ip),
+ TP_ARGS(mp, eofb, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(__u32, flags)
+ __field(uint32_t, uid)
+ __field(uint32_t, gid)
+ __field(prid_t, prid)
+ __field(__u64, min_file_size)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->flags = eofb->eof_flags;
+ __entry->uid = from_kuid(mp->m_super->s_user_ns, eofb->eof_uid);
+ __entry->gid = from_kgid(mp->m_super->s_user_ns, eofb->eof_gid);
+ __entry->prid = eofb->eof_prid;
+ __entry->min_file_size = eofb->eof_min_file_size;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->flags,
+ __entry->uid,
+ __entry->gid,
+ __entry->prid,
+ __entry->min_file_size,
+ (char *)__entry->caller_ip)
+);
+#define DEFINE_EOFBLOCKS_EVENT(name) \
+DEFINE_EVENT(xfs_eofblocks_class, name, \
+ TP_PROTO(struct xfs_mount *mp, struct xfs_eofblocks *eofb, \
+ unsigned long caller_ip), \
+ TP_ARGS(mp, eofb, caller_ip))
+DEFINE_EOFBLOCKS_EVENT(xfs_ioc_free_eofblocks);
+DEFINE_EOFBLOCKS_EVENT(xfs_inode_free_quota_blocks);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH