diff options
Diffstat (limited to 'fs/xfs/xfs_swapext_item.c')
-rw-r--r-- | fs/xfs/xfs_swapext_item.c | 343 |
1 files changed, 340 insertions, 3 deletions
diff --git a/fs/xfs/xfs_swapext_item.c b/fs/xfs/xfs_swapext_item.c index d080b1e1665e..f51e83e33027 100644 --- a/fs/xfs/xfs_swapext_item.c +++ b/fs/xfs/xfs_swapext_item.c @@ -16,13 +16,16 @@ #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_swapext_item.h" +#include "xfs_swapext.h" #include "xfs_log.h" #include "xfs_bmap.h" #include "xfs_icache.h" +#include "xfs_bmap_btree.h" #include "xfs_trans_space.h" #include "xfs_error.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" +#include "xfs_xchgrange.h" struct kmem_cache *xfs_sxi_cache; struct kmem_cache *xfs_sxd_cache; @@ -195,13 +198,334 @@ static const struct xfs_item_ops xfs_sxd_item_ops = { .iop_release = xfs_sxd_item_release, }; +static struct xfs_sxd_log_item * +xfs_trans_get_sxd( + struct xfs_trans *tp, + struct xfs_sxi_log_item *sxi_lip) +{ + struct xfs_sxd_log_item *sxd_lip; + + sxd_lip = kmem_cache_zalloc(xfs_sxd_cache, GFP_KERNEL | __GFP_NOFAIL); + xfs_log_item_init(tp->t_mountp, &sxd_lip->sxd_item, XFS_LI_SXD, + &xfs_sxd_item_ops); + sxd_lip->sxd_intent_log_item = sxi_lip; + sxd_lip->sxd_format.sxd_sxi_id = sxi_lip->sxi_format.sxi_id; + + xfs_trans_add_item(tp, &sxd_lip->sxd_item); + return sxd_lip; +} + +/* + * Finish an swapext update and log it to the SXD. Note that the transaction is + * marked dirty regardless of whether the swapext update succeeds or fails to + * support the SXI/SXD lifecycle rules. + */ +static int +xfs_swapext_finish_update( + struct xfs_trans *tp, + struct xfs_log_item *done, + struct xfs_swapext_intent *sxi) +{ + int error; + + error = xfs_swapext_finish_one(tp, sxi); + + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the SXI and frees the SXD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + if (done) + set_bit(XFS_LI_DIRTY, &done->li_flags); + + return error; +} + +/* Log swapext updates in the intent item. */ +STATIC void +xfs_swapext_log_item( + struct xfs_trans *tp, + struct xfs_sxi_log_item *sxi_lip, + struct xfs_swapext_intent *sxi) +{ + struct xfs_swap_extent *sx; + + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &sxi_lip->sxi_item.li_flags); + + sx = &sxi_lip->sxi_format.sxi_extent; + sx->sx_inode1 = sxi->sxi_ip1->i_ino; + sx->sx_inode2 = sxi->sxi_ip2->i_ino; + sx->sx_startoff1 = sxi->sxi_startoff1; + sx->sx_startoff2 = sxi->sxi_startoff2; + sx->sx_blockcount = sxi->sxi_blockcount; + sx->sx_isize1 = sxi->sxi_isize1; + sx->sx_isize2 = sxi->sxi_isize2; + sx->sx_flags = sxi->sxi_flags; +} + +STATIC struct xfs_log_item * +xfs_swapext_create_intent( + struct xfs_trans *tp, + struct list_head *items, + unsigned int count, + bool sort) +{ + struct xfs_sxi_log_item *sxi_lip = xfs_sxi_init(tp->t_mountp); + struct xfs_swapext_intent *sxi; + + ASSERT(count == XFS_SXI_MAX_FAST_EXTENTS); + + /* + * We use the same defer ops control machinery to perform extent swaps + * even if we lack the machinery to track the operation status through + * log items. + */ + if (!xfs_has_atomicswap(tp->t_mountp)) + return NULL; + + xfs_trans_add_item(tp, &sxi_lip->sxi_item); + list_for_each_entry(sxi, items, sxi_list) + xfs_swapext_log_item(tp, sxi_lip, sxi); + return &sxi_lip->sxi_item; +} + +STATIC struct xfs_log_item * +xfs_swapext_create_done( + struct xfs_trans *tp, + struct xfs_log_item *intent, + unsigned int count) +{ + if (intent == NULL) + return NULL; + return &xfs_trans_get_sxd(tp, SXI_ITEM(intent))->sxd_item; +} + +/* Process a deferred swapext update. */ +STATIC int +xfs_swapext_finish_item( + struct xfs_trans *tp, + struct xfs_log_item *done, + struct list_head *item, + struct xfs_btree_cur **state) +{ + struct xfs_swapext_intent *sxi; + int error; + + sxi = container_of(item, struct xfs_swapext_intent, sxi_list); + + /* + * Swap one more extent between the two files. If there's still more + * work to do, we want to requeue ourselves after all other pending + * deferred operations have finished. This includes all of the dfops + * that we queued directly as well as any new ones created in the + * process of finishing the others. Doing so prevents us from queuing + * a large number of SXI log items in kernel memory, which in turn + * prevents us from pinning the tail of the log (while logging those + * new SXI items) until the first SXI items can be processed. + */ + error = xfs_swapext_finish_update(tp, done, sxi); + if (error == -EAGAIN) + return error; + + kmem_free(sxi); + return error; +} + +/* Abort all pending SXIs. */ +STATIC void +xfs_swapext_abort_intent( + struct xfs_log_item *intent) +{ + xfs_sxi_release(SXI_ITEM(intent)); +} + +/* Cancel a deferred swapext update. */ +STATIC void +xfs_swapext_cancel_item( + struct list_head *item) +{ + struct xfs_swapext_intent *sxi; + + sxi = container_of(item, struct xfs_swapext_intent, sxi_list); + kmem_free(sxi); +} + +const struct xfs_defer_op_type xfs_swapext_defer_type = { + .max_items = XFS_SXI_MAX_FAST_EXTENTS, + .create_intent = xfs_swapext_create_intent, + .abort_intent = xfs_swapext_abort_intent, + .create_done = xfs_swapext_create_done, + .finish_item = xfs_swapext_finish_item, + .cancel_item = xfs_swapext_cancel_item, +}; + +/* Is this recovered SXI ok? */ +static inline bool +xfs_sxi_validate( + struct xfs_mount *mp, + struct xfs_sxi_log_item *sxi_lip) +{ + struct xfs_swap_extent *sx = &sxi_lip->sxi_format.sxi_extent; + + if (!xfs_has_atomicswap(mp)) + return false; + + if (sxi_lip->sxi_format.__pad != 0) + return false; + + if (sx->sx_flags & ~XFS_SWAP_EXT_FLAGS) + return false; + + if (!xfs_verify_ino(mp, sx->sx_inode1) || + !xfs_verify_ino(mp, sx->sx_inode2)) + return false; + + if ((sx->sx_flags & XFS_SWAP_EXT_SET_SIZES) && + (sx->sx_isize1 < 0 || sx->sx_isize2 < 0)) + return false; + + if (!xfs_verify_fileext(mp, sx->sx_startoff1, sx->sx_blockcount)) + return false; + + return xfs_verify_fileext(mp, sx->sx_startoff2, sx->sx_blockcount); +} + +/* + * Use the recovered log state to create a new request, estimate resource + * requirements, and create a new incore intent state. + */ +STATIC struct xfs_swapext_intent * +xfs_sxi_item_recover_intent( + struct xfs_mount *mp, + const struct xfs_swap_extent *sx, + struct xfs_swapext_res *res) +{ + struct xfs_swapext_req req; + struct xfs_inode *ip1, *ip2; + int error; + + /* + * Grab both inodes and set IRECOVERY to prevent trimming of post-eof + * extents and freeing of unlinked inodes until we're totally done + * processing files. + */ + error = xlog_recover_iget(mp, sx->sx_inode1, &ip1); + if (error) + return ERR_PTR(error); + error = xlog_recover_iget(mp, sx->sx_inode2, &ip2); + if (error) + goto err_rele1; + + req.ip1 = ip1; + req.ip2 = ip2; + req.startoff1 = sx->sx_startoff1; + req.startoff2 = sx->sx_startoff2; + req.blockcount = sx->sx_blockcount; + req.req_flags = 0; + + if (sx->sx_flags & XFS_SWAP_EXT_ATTR_FORK) + req.whichfork = XFS_ATTR_FORK; + else + req.whichfork = XFS_DATA_FORK; + + if (sx->sx_flags & XFS_SWAP_EXT_SET_SIZES) + req.req_flags |= XFS_SWAP_REQ_SET_SIZES; + if (sx->sx_flags & XFS_SWAP_EXT_SKIP_FILE1_HOLES) + req.req_flags |= XFS_SWAP_REQ_SKIP_FILE1_HOLES; + + xfs_xchg_range_ilock(NULL, ip1, ip2); + error = xfs_swapext_estimate(&req, res); + xfs_xchg_range_iunlock(ip1, ip2); + if (error) + goto err_rele2; + + return xfs_swapext_init_intent(&req); + +err_rele2: + xfs_irele(ip2); +err_rele1: + xfs_irele(ip1); + return ERR_PTR(error); +} + /* Process a swapext update intent item that was recovered from the log. */ STATIC int xfs_sxi_item_recover( struct xfs_log_item *lip, struct list_head *capture_list) { - return -EFSCORRUPTED; + struct xfs_swapext_intent *sxi; + struct xfs_swapext_res res; + struct xfs_sxi_log_item *sxi_lip = SXI_ITEM(lip); + struct xfs_mount *mp = lip->li_mountp; + struct xfs_swap_extent *sx = &sxi_lip->sxi_format.sxi_extent; + struct xfs_sxd_log_item *sxd_lip = NULL; + struct xfs_trans *tp; + struct xfs_inode *ip1, *ip2; + int error = 0; + + if (!xfs_sxi_validate(mp, sxi_lip)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &sxi_lip->sxi_format, + sizeof(sxi_lip->sxi_format)); + return -EFSCORRUPTED; + } + + sxi = xfs_sxi_item_recover_intent(mp, sx, &res); + if (IS_ERR(sxi)) + return PTR_ERR(sxi); + + ip1 = sxi->sxi_ip1; + ip2 = sxi->sxi_ip2; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, res.resblks, 0, 0, + &tp); + if (error) + goto err_rele; + + sxd_lip = xfs_trans_get_sxd(tp, sxi_lip); + + xfs_xchg_range_ilock(tp, ip1, ip2); + + error = xfs_swapext_finish_update(tp, &sxd_lip->sxd_item, sxi); + if (error == -EAGAIN) { + /* + * If there's more extent swapping to be done, we have to + * schedule that as a separate deferred operation to be run + * after we've finished replaying all of the intents we + * recovered from the log. Transfer ownership of the sxi to + * the transaction. + */ + xfs_swapext_schedule(tp, sxi); + error = 0; + sxi = NULL; + } + if (error == -EFSCORRUPTED) + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, sx, + sizeof(*sx)); + if (error) + goto err_cancel; + + /* + * Commit transaction, which frees the transaction and saves the inodes + * for later replay activities. + */ + error = xfs_defer_ops_capture_and_commit(tp, capture_list); + goto err_unlock; + +err_cancel: + xfs_trans_cancel(tp); +err_unlock: + xfs_xchg_range_iunlock(ip1, ip2); +err_rele: + kmem_free(sxi); + xfs_irele(ip2); + xfs_irele(ip1); + return error; } STATIC bool @@ -218,8 +542,21 @@ xfs_sxi_item_relog( struct xfs_log_item *intent, struct xfs_trans *tp) { - ASSERT(0); - return NULL; + struct xfs_sxd_log_item *sxd_lip; + struct xfs_sxi_log_item *sxi_lip; + struct xfs_swap_extent *sx; + + sx = &SXI_ITEM(intent)->sxi_format.sxi_extent; + + tp->t_flags |= XFS_TRANS_DIRTY; + sxd_lip = xfs_trans_get_sxd(tp, SXI_ITEM(intent)); + set_bit(XFS_LI_DIRTY, &sxd_lip->sxd_item.li_flags); + + sxi_lip = xfs_sxi_init(tp->t_mountp); + memcpy(&sxi_lip->sxi_format.sxi_extent, sx, sizeof(*sx)); + xfs_trans_add_item(tp, &sxi_lip->sxi_item); + set_bit(XFS_LI_DIRTY, &sxi_lip->sxi_item.li_flags); + return &sxi_lip->sxi_item; } static const struct xfs_item_ops xfs_sxi_item_ops = { |