diff options
Diffstat (limited to 'fs/xfs/xfs_bmap_util.c')
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 567 |
1 files changed, 0 insertions, 567 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index a0ce5e5dd5e4..024cb13fb9f0 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1235,570 +1235,3 @@ out_trans_cancel: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } - -/* - * We need to check that the format of the data fork in the temporary inode is - * valid for the target inode before doing the swap. This is not a problem with - * attr1 because of the fixed fork offset, but attr2 has a dynamically sized - * data fork depending on the space the attribute fork is taking so we can get - * invalid formats on the target inode. - * - * E.g. target has space for 7 extents in extent format, temp inode only has - * space for 6. If we defragment down to 7 extents, then the tmp format is a - * btree, but when swapped it needs to be in extent format. Hence we can't just - * blindly swap data forks on attr2 filesystems. - * - * Note that we check the swap in both directions so that we don't end up with - * a corrupt temporary inode, either. - * - * Note that fixing the way xfs_fsr sets up the attribute fork in the source - * inode will prevent this situation from occurring, so all we do here is - * reject and log the attempt. basically we are putting the responsibility on - * userspace to get this right. - */ -int -xfs_swap_extents_check_format( - struct xfs_inode *ip, /* target inode */ - struct xfs_inode *tip) /* tmp inode */ -{ - - /* Should never get a local format */ - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || - tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) - return -EINVAL; - - /* - * if the target inode has less extents that then temporary inode then - * why did userspace call us? - */ - if (ip->i_d.di_nextents < tip->i_d.di_nextents) - return -EINVAL; - - /* - * If we have to use the (expensive) rmap swap method, we can - * handle any number of extents and any format. - */ - if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb)) - return 0; - - /* - * if the target inode is in extent form and the temp inode is in btree - * form then we will end up with the target inode in the wrong format - * as we already know there are less extents in the temp inode. - */ - if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && - tip->i_d.di_format == XFS_DINODE_FMT_BTREE) - return -EINVAL; - - /* Check temp in extent form to max in target */ - if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > - XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) - return -EINVAL; - - /* Check target in extent form to max in temp */ - if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > - XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) - return -EINVAL; - - /* - * If we are in a btree format, check that the temp root block will fit - * in the target and that it has enough extents to be in btree format - * in the target. - * - * Note that we have to be careful to allow btree->extent conversions - * (a common defrag case) which will occur when the temp inode is in - * extent format... - */ - if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { - if (XFS_IFORK_Q(ip) && - XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) - return -EINVAL; - if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= - XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) - return -EINVAL; - } - - /* Reciprocal target->temp btree format checks */ - if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { - if (XFS_IFORK_Q(tip) && - XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) - return -EINVAL; - if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= - XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) - return -EINVAL; - } - - return 0; -} - -static int -xfs_swap_extent_flush( - struct xfs_inode *ip) -{ - int error; - - error = filemap_write_and_wait(VFS_I(ip)->i_mapping); - if (error) - return error; - truncate_pagecache_range(VFS_I(ip), 0, -1); - - /* Verify O_DIRECT for ftmp */ - if (VFS_I(ip)->i_mapping->nrpages) - return -EINVAL; - return 0; -} - -/* - * Fix up the owners of the bmbt blocks to refer to the current inode. The - * change owner scan attempts to order all modified buffers in the current - * transaction. In the event of ordered buffer failure, the offending buffer is - * physically logged as a fallback and the scan returns -EAGAIN. We must roll - * the transaction in this case to replenish the fallback log reservation and - * restart the scan. This process repeats until the scan completes. - */ -static int -xfs_swap_change_owner( - struct xfs_trans **tpp, - struct xfs_inode *ip, - struct xfs_inode *tmpip) -{ - int error; - struct xfs_trans *tp = *tpp; - - do { - error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino, - NULL); - /* success or fatal error */ - if (error != -EAGAIN) - break; - - error = xfs_trans_roll(tpp); - if (error) - break; - tp = *tpp; - - /* - * Redirty both inodes so they can relog and keep the log tail - * moving forward. - */ - xfs_trans_ijoin(tp, ip, 0); - xfs_trans_ijoin(tp, tmpip, 0); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE); - } while (true); - - return error; -} - -/* Swap the extents of two files by swapping data forks. */ -int -xfs_swap_extent_forks( - struct xfs_trans **tpp, - struct xfs_swapext_req *req) -{ - struct xfs_inode *ip = req->ip1; - struct xfs_inode *tip = req->ip2; - xfs_filblks_t aforkblks = 0; - xfs_filblks_t taforkblks = 0; - int64_t temp_blks; - xfs_extnum_t junk; - uint64_t tmp; - unsigned int reflink_state; - int src_log_flags = XFS_ILOG_CORE; - int target_log_flags = XFS_ILOG_CORE; - int error; - - reflink_state = xfs_swapext_reflink_prep(req); - - /* - * Count the number of extended attribute blocks - */ - if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && - (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { - error = xfs_bmap_count_blocks(*tpp, ip, XFS_ATTR_FORK, &junk, - &aforkblks); - if (error) - return error; - } - if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && - (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { - error = xfs_bmap_count_blocks(*tpp, tip, XFS_ATTR_FORK, &junk, - &taforkblks); - if (error) - return error; - } - - /* - * Btree format (v3) inodes have the inode number stamped in the bmbt - * block headers. We can't start changing the bmbt blocks until the - * inode owner change is logged so recovery does the right thing in the - * event of a crash. Set the owner change log flags now and leave the - * bmbt scan as the last step. - */ - if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { - if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) - target_log_flags |= XFS_ILOG_DOWNER; - if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) - src_log_flags |= XFS_ILOG_DOWNER; - } - - /* - * Swap the data forks of the inodes - */ - swap(ip->i_df, tip->i_df); - - /* Update quota accounting. */ - temp_blks = tip->i_d.di_nblocks - taforkblks + aforkblks; - xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, - temp_blks - ip->i_d.di_nblocks); - - temp_blks = ip->i_d.di_nblocks + taforkblks - aforkblks; - xfs_trans_mod_dquot_byino(*tpp, tip, XFS_TRANS_DQ_BCOUNT, - temp_blks - tip->i_d.di_nblocks); - - /* - * Fix the on-disk inode values - */ - tmp = (uint64_t)ip->i_d.di_nblocks; - ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; - tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; - - swap(ip->i_d.di_nextents, tip->i_d.di_nextents); - swap(ip->i_d.di_format, tip->i_d.di_format); - - /* - * The extents in the source inode could still contain speculative - * preallocation beyond EOF (e.g. the file is open but not modified - * while defrag is in progress). In that case, we need to copy over the - * number of delalloc blocks the data fork in the source inode is - * tracking beyond EOF so that when the fork is truncated away when the - * temporary inode is unlinked we don't underrun the i_delayed_blks - * counter on that inode. - */ - ASSERT(tip->i_delayed_blks == 0); - tip->i_delayed_blks = ip->i_delayed_blks; - ip->i_delayed_blks = 0; - - switch (ip->i_d.di_format) { - case XFS_DINODE_FMT_EXTENTS: - src_log_flags |= XFS_ILOG_DEXT; - break; - case XFS_DINODE_FMT_BTREE: - ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) || - (src_log_flags & XFS_ILOG_DOWNER)); - src_log_flags |= XFS_ILOG_DBROOT; - break; - } - - switch (tip->i_d.di_format) { - case XFS_DINODE_FMT_EXTENTS: - target_log_flags |= XFS_ILOG_DEXT; - break; - case XFS_DINODE_FMT_BTREE: - target_log_flags |= XFS_ILOG_DBROOT; - ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) || - (target_log_flags & XFS_ILOG_DOWNER)); - break; - } - - xfs_swapext_reflink_finish(*tpp, req, reflink_state); - - xfs_trans_log_inode(*tpp, ip, src_log_flags); - xfs_trans_log_inode(*tpp, tip, target_log_flags); - - /* - * The extent forks have been swapped, but crc=1,rmapbt=0 filesystems - * have inode number owner values in the bmbt blocks that still refer to - * the old inode. Scan each bmbt to fix up the owner values with the - * inode number of the current inode. - */ - if (src_log_flags & XFS_ILOG_DOWNER) { - error = xfs_swap_change_owner(tpp, ip, tip); - if (error) - return error; - } - if (target_log_flags & XFS_ILOG_DOWNER) { - error = xfs_swap_change_owner(tpp, tip, ip); - if (error) - return error; - } - - return 0; -} - -/* - * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip - * this if quota enforcement is disabled or if both inodes' dquots are the - * same. - */ -STATIC int -xfs_swap_extents_prep_quota( - struct xfs_trans *tp, - struct xfs_inode *ip, - struct xfs_inode *tip) -{ - struct xfs_mount *mp = ip->i_mount; - xfs_filblks_t aforkblks = 0; - xfs_filblks_t taforkblks = 0; - xfs_filblks_t ip_mapped, tip_mapped; - xfs_extnum_t junk; - int error; - - /* - * Don't bother with a quota reservation if we're not enforcing them - * or the two inodes have the same dquots. - */ - if (!(mp->m_qflags & XFS_ALL_QUOTA_ENFD) || - (ip->i_udquot == tip->i_udquot && - ip->i_gdquot == tip->i_gdquot && - ip->i_pdquot == tip->i_pdquot)) - return 0; - - /* - * Count the number of extended attribute blocks - */ - if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && - (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { - error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk, - &aforkblks); - if (error) - return error; - } - if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && - (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { - error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk, - &taforkblks); - if (error) - return error; - } - - /* Figure out how many blocks we'll move out of each file. */ - ip_mapped = ip->i_d.di_nblocks - aforkblks; - tip_mapped = tip->i_d.di_nblocks - taforkblks; - - /* - * For each file, compute the net gain in the number of blocks that - * will be mapped into that file and reserve that much quota. The - * quota counts must be able to absorb at least that much space. - */ - if (tip_mapped > ip_mapped) { - error = xfs_trans_reserve_quota_nblks(tp, ip, - tip_mapped - ip_mapped, 0, - XFS_QMOPT_RES_REGBLKS); - if (error) - return error; - } - - if (ip_mapped > tip_mapped) { - error = xfs_trans_reserve_quota_nblks(tp, tip, - ip_mapped - tip_mapped, 0, - XFS_QMOPT_RES_REGBLKS); - if (error) - return error; - } - - /* - * For each file, forcibly reserve the gross gain in mapped blocks so - * that we don't trip over any quota block reservation assertions. - * We must reserve the gross gain because the quota code subtracts from - * bcount the number of blocks that we unmap; it does not add that - * quantity back to the quota block reservation. - */ - error = xfs_trans_reserve_quota_nblks(tp, ip, ip_mapped, 0, - XFS_QMOPT_FORCE_RES | XFS_QMOPT_RES_REGBLKS); - if (error) - return error; - - return xfs_trans_reserve_quota_nblks(tp, tip, tip_mapped, 0, - XFS_QMOPT_FORCE_RES | XFS_QMOPT_RES_REGBLKS); -} - -int -xfs_swap_extents( - struct xfs_inode *ip, /* target inode */ - struct xfs_inode *tip, /* tmp inode */ - struct xfs_swapext *sxp) -{ - struct xfs_swapext_req req = { - .ip1 = ip, - .ip2 = tip, - .whichfork = XFS_DATA_FORK, - }; - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - struct xfs_bstat *sbp = &sxp->sx_stat; - int error = 0; - int lock_flags; - int resblks = 0; - - /* - * Lock the inodes against other IO, page faults and truncate to - * begin with. Then we can ensure the inodes are flushed and have no - * page cache safely. Once we have done this we can take the ilocks and - * do the rest of the checks. - */ - lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); - lock_flags = XFS_MMAPLOCK_EXCL; - xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL); - - /* Verify that both files have the same format */ - if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { - error = -EINVAL; - goto out_unlock; - } - - /* Verify both files are either real-time or non-realtime */ - if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { - error = -EINVAL; - goto out_unlock; - } - - error = xfs_qm_dqattach(ip); - if (error) - goto out_unlock; - - error = xfs_qm_dqattach(tip); - if (error) - goto out_unlock; - - error = xfs_swap_extent_flush(ip); - if (error) - goto out_unlock; - error = xfs_swap_extent_flush(tip); - if (error) - goto out_unlock; - - if (xfs_inode_has_cow_data(tip)) { - error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true); - if (error) - goto out_unlock; - } - - /* - * Extent "swapping" with rmap requires a permanent reservation and - * a block reservation because it's really just a remap operation - * performed with log redo items! - */ - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { - int w = XFS_DATA_FORK; - uint32_t ipnext = XFS_IFORK_NEXTENTS(ip, w); - uint32_t tipnext = XFS_IFORK_NEXTENTS(tip, w); - - /* - * Conceptually this shouldn't affect the shape of either bmbt, - * but since we atomically move extents one by one, we reserve - * enough space to rebuild both trees. - */ - resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w); - resblks += XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w); - - /* - * Handle the corner case where either inode might straddle the - * btree format boundary. If so, the inode could bounce between - * btree <-> extent format on unmap -> remap cycles, freeing and - * allocating a bmapbt block each time. - */ - if (ipnext == (XFS_IFORK_MAXEXT(ip, w) + 1)) - resblks += XFS_IFORK_MAXEXT(ip, w); - if (tipnext == (XFS_IFORK_MAXEXT(tip, w) + 1)) - resblks += XFS_IFORK_MAXEXT(tip, w); - } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); - if (error) - goto out_unlock; - - /* - * Lock and join the inodes to the tansaction so that transaction commit - * or cancel will unlock the inodes from this point onwards. - */ - xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL); - lock_flags |= XFS_ILOCK_EXCL; - xfs_trans_ijoin(tp, ip, 0); - xfs_trans_ijoin(tp, tip, 0); - - - /* Verify all data are being swapped */ - if (sxp->sx_offset != 0 || - sxp->sx_length != ip->i_d.di_size || - sxp->sx_length != tip->i_d.di_size) { - error = -EFAULT; - goto out_trans_cancel; - } - - trace_xfs_swap_extent_before(ip, 0); - trace_xfs_swap_extent_before(tip, 1); - - /* check inode formats now that data is flushed */ - error = xfs_swap_extents_check_format(ip, tip); - if (error) { - xfs_notice(mp, - "%s: inode 0x%llx format is incompatible for exchanging.", - __func__, ip->i_ino); - goto out_trans_cancel; - } - - /* - * Compare the current change & modify times with that - * passed in. If they differ, we abort this swap. - * This is the mechanism used to ensure the calling - * process that the file was not changed out from - * under it. - */ - if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || - (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || - (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || - (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { - error = -EBUSY; - goto out_trans_cancel; - } - - /* - * Reserve ourselves some quota if any of them are in enforcing mode. - * In theory we only need enough to satisfy the change in the number - * of blocks between the two ranges being remapped. - */ - error = xfs_swap_extents_prep_quota(tp, ip, tip); - if (error) - goto out_trans_cancel; - - /* - * Note the trickiness in setting the log flags - we set the owner log - * flag on the opposite inode (i.e. the inode we are setting the new - * owner to be) because once we swap the forks and log that, log - * recovery is going to see the fork as owned by the swapped inode, - * not the pre-swapped inodes. - */ - req.blockcount = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip))); - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) - error = xfs_swapext_deferred_bmap(&tp, &req); - else - error = xfs_swap_extent_forks(&tp, &req); - if (error) { - trace_xfs_swap_extent_error(ip, error, _THIS_IP_); - goto out_trans_cancel; - } - - /* - * If this is a synchronous mount, make sure that the - * transaction goes to disk before returning to the user. - */ - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - - error = xfs_trans_commit(tp); - - trace_xfs_swap_extent_after(ip, 0); - trace_xfs_swap_extent_after(tip, 1); - -out_unlock: - xfs_iunlock(ip, lock_flags); - xfs_iunlock(tip, lock_flags); - unlock_two_nondirectories(VFS_I(ip), VFS_I(tip)); - return error; - -out_trans_cancel: - xfs_trans_cancel(tp); - goto out_unlock; -} |