diff options
Diffstat (limited to 'fs/xfs/libxfs')
37 files changed, 1023 insertions, 663 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index e5ebc3770460..33db69be4832 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -39,6 +39,7 @@ #include "xfs_rmap_btree.h" #include "xfs_btree.h" #include "xfs_refcount_btree.h" +#include "xfs_ialloc_btree.h" /* * Per-AG Block Reservations @@ -200,22 +201,30 @@ __xfs_ag_resv_init( struct xfs_mount *mp = pag->pag_mount; struct xfs_ag_resv *resv; int error; + xfs_extlen_t reserved; - resv = xfs_perag_resv(pag, type); if (used > ask) ask = used; - resv->ar_asked = ask; - resv->ar_reserved = resv->ar_orig_reserved = ask - used; - mp->m_ag_max_usable -= ask; + reserved = ask - used; - trace_xfs_ag_resv_init(pag, type, ask); - - error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true); - if (error) + error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); + if (error) { trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, error, _RET_IP_); + xfs_warn(mp, +"Per-AG reservation for AG %u failed. Filesystem may run out of space.", + pag->pag_agno); + return error; + } - return error; + mp->m_ag_max_usable -= ask; + + resv = xfs_perag_resv(pag, type); + resv->ar_asked = ask; + resv->ar_reserved = resv->ar_orig_reserved = reserved; + + trace_xfs_ag_resv_init(pag, type, ask); + return 0; } /* Create a per-AG block reservation. */ @@ -223,6 +232,8 @@ int xfs_ag_resv_init( struct xfs_perag *pag) { + struct xfs_mount *mp = pag->pag_mount; + xfs_agnumber_t agno = pag->pag_agno; xfs_extlen_t ask; xfs_extlen_t used; int error = 0; @@ -231,23 +242,45 @@ xfs_ag_resv_init( if (pag->pag_meta_resv.ar_asked == 0) { ask = used = 0; - error = xfs_refcountbt_calc_reserves(pag->pag_mount, - pag->pag_agno, &ask, &used); + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; - error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, - ask, used); + error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; + + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, + ask, used); + if (error) { + /* + * Because we didn't have per-AG reservations when the + * finobt feature was added we might not be able to + * reserve all needed blocks. Warn and fall back to the + * old and potentially buggy code in that case, but + * ensure we do have the reservation for the refcountbt. + */ + ask = used = 0; + + mp->m_inotbt_nores = true; + + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, + &used); + if (error) + goto out; + + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, + ask, used); + if (error) + goto out; + } } /* Create the AGFL metadata reservation */ if (pag->pag_agfl_resv.ar_asked == 0) { ask = used = 0; - error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, - &ask, &used); + error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; @@ -256,6 +289,16 @@ xfs_ag_resv_init( goto out; } +#ifdef DEBUG + /* need to read in the AGF for the ASSERT below to work */ + error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); + if (error) + return error; + + ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + + xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= + pag->pagf_freeblks + pag->pagf_flcount); +#endif out: return error; } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index effb64cf714f..369adcc18c02 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -95,10 +95,7 @@ unsigned int xfs_alloc_set_aside( struct xfs_mount *mp) { - unsigned int blocks; - - blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE); - return blocks; + return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4); } /* @@ -224,20 +221,22 @@ xfs_alloc_get_rec( * Compute aligned version of the found extent. * Takes alignment and min length into account. */ -STATIC void +STATIC bool xfs_alloc_compute_aligned( xfs_alloc_arg_t *args, /* allocation argument structure */ xfs_agblock_t foundbno, /* starting block in found extent */ xfs_extlen_t foundlen, /* length in found extent */ xfs_agblock_t *resbno, /* result block number */ - xfs_extlen_t *reslen) /* result length */ + xfs_extlen_t *reslen, /* result length */ + unsigned *busy_gen) { - xfs_agblock_t bno; - xfs_extlen_t len; + xfs_agblock_t bno = foundbno; + xfs_extlen_t len = foundlen; xfs_extlen_t diff; + bool busy; /* Trim busy sections out of found extent */ - xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); + busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen); /* * If we have a largish extent that happens to start before min_agbno, @@ -262,6 +261,8 @@ xfs_alloc_compute_aligned( *resbno = bno; *reslen = len; } + + return busy; } /* @@ -365,36 +366,12 @@ xfs_alloc_fix_len( return; ASSERT(rlen >= args->minlen && rlen <= args->maxlen); ASSERT(rlen % args->prod == args->mod); + ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >= + rlen + args->minleft); args->len = rlen; } /* - * Fix up length if there is too little space left in the a.g. - * Return 1 if ok, 0 if too little, should give up. - */ -STATIC int -xfs_alloc_fix_minleft( - xfs_alloc_arg_t *args) /* allocation argument structure */ -{ - xfs_agf_t *agf; /* a.g. freelist header */ - int diff; /* free space difference */ - - if (args->minleft == 0) - return 1; - agf = XFS_BUF_TO_AGF(args->agbp); - diff = be32_to_cpu(agf->agf_freeblks) - - args->len - args->minleft; - if (diff >= 0) - return 1; - args->len += diff; /* shrink the allocated space */ - /* casts to (int) catch length underflows */ - if ((int)args->len >= (int)args->minlen) - return 1; - args->agbno = NULLAGBLOCK; - return 0; -} - -/* * Update the two btrees, logically removing from freespace the extent * starting at rbno, rlen blocks. The extent is contained within the * actual (current) free extent fbno for flen blocks. @@ -689,8 +666,6 @@ xfs_alloc_ag_vextent( xfs_alloc_arg_t *args) /* argument structure for allocation */ { int error=0; - xfs_extlen_t reservation; - xfs_extlen_t oldmax; ASSERT(args->minlen > 0); ASSERT(args->maxlen > 0); @@ -699,20 +674,6 @@ xfs_alloc_ag_vextent( ASSERT(args->alignment > 0); /* - * Clamp maxlen to the amount of free space minus any reservations - * that have been made. - */ - oldmax = args->maxlen; - reservation = xfs_ag_resv_needed(args->pag, args->resv); - if (args->maxlen > args->pag->pagf_freeblks - reservation) - args->maxlen = args->pag->pagf_freeblks - reservation; - if (args->maxlen == 0) { - args->agbno = NULLAGBLOCK; - args->maxlen = oldmax; - return 0; - } - - /* * Branch to correct routine based on the type. */ args->wasfromfl = 0; @@ -731,8 +692,6 @@ xfs_alloc_ag_vextent( /* NOTREACHED */ } - args->maxlen = oldmax; - if (error || args->agbno == NULLAGBLOCK) return error; @@ -782,10 +741,11 @@ xfs_alloc_ag_vextent_exact( int error; xfs_agblock_t fbno; /* start block of found extent */ xfs_extlen_t flen; /* length of found extent */ - xfs_agblock_t tbno; /* start block of trimmed extent */ - xfs_extlen_t tlen; /* length of trimmed extent */ - xfs_agblock_t tend; /* end block of trimmed extent */ + xfs_agblock_t tbno; /* start block of busy extent */ + xfs_extlen_t tlen; /* length of busy extent */ + xfs_agblock_t tend; /* end block of busy extent */ int i; /* success/failure of operation */ + unsigned busy_gen; ASSERT(args->alignment == 1); @@ -818,7 +778,9 @@ xfs_alloc_ag_vextent_exact( /* * Check for overlapping busy extents. */ - xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); + tbno = fbno; + tlen = flen; + xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen); /* * Give up if the start of the extent is busy, or the freespace isn't @@ -841,9 +803,6 @@ xfs_alloc_ag_vextent_exact( args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) - args->agbno; xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto not_found; - ASSERT(args->agbno + args->len <= tend); /* @@ -901,6 +860,7 @@ xfs_alloc_find_best_extent( xfs_agblock_t sdiff; int error; int i; + unsigned busy_gen; /* The good extent is perfect, no need to search. */ if (!gdiff) @@ -914,7 +874,8 @@ xfs_alloc_find_best_extent( if (error) goto error0; XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); + xfs_alloc_compute_aligned(args, *sbno, *slen, + sbnoa, slena, &busy_gen); /* * The good extent is closer than this one. @@ -1003,7 +964,8 @@ xfs_alloc_ag_vextent_near( xfs_extlen_t ltlena; /* aligned ... */ xfs_agblock_t ltnew; /* useful start bno of left side */ xfs_extlen_t rlen; /* length of returned extent */ - int forced = 0; + bool busy; + unsigned busy_gen; #ifdef DEBUG /* * Randomly don't execute the first algorithm. @@ -1030,6 +992,7 @@ restart: ltlen = 0; gtlena = 0; ltlena = 0; + busy = false; /* * Get a cursor for the by-size btree. @@ -1112,8 +1075,8 @@ restart: if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - xfs_alloc_compute_aligned(args, ltbno, ltlen, - <bnoa, <lena); + busy = xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena, &busy_gen); if (ltlena < args->minlen) continue; if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) @@ -1149,12 +1112,7 @@ restart: XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->len = blen; - if (!xfs_alloc_fix_minleft(args)) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_near_nominleft(args); - return 0; - } - blen = args->len; + /* * We are allocating starting at bnew for blen blocks. */ @@ -1236,8 +1194,8 @@ restart: if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - xfs_alloc_compute_aligned(args, ltbno, ltlen, - <bnoa, <lena); + busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena, &busy_gen); if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) break; if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) @@ -1252,8 +1210,8 @@ restart: if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - xfs_alloc_compute_aligned(args, gtbno, gtlen, - >bnoa, >lena); + busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen, + >bnoa, >lena, &busy_gen); if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) break; if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) @@ -1314,9 +1272,9 @@ restart: if (bno_cur_lt == NULL && bno_cur_gt == NULL) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - if (!forced++) { + if (busy) { trace_xfs_alloc_near_busy(args); - xfs_log_force(args->mp, XFS_LOG_SYNC); + xfs_extent_busy_flush(args->mp, args->pag, busy_gen); goto restart; } trace_xfs_alloc_size_neither(args); @@ -1346,12 +1304,6 @@ restart: */ args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) { - trace_xfs_alloc_near_nominleft(args); - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - return 0; - } rlen = args->len; (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, args->datatype, ltbnoa, ltlena, <new); @@ -1403,7 +1355,8 @@ xfs_alloc_ag_vextent_size( int i; /* temp status variable */ xfs_agblock_t rbno; /* returned block number */ xfs_extlen_t rlen; /* length of returned extent */ - int forced = 0; + bool busy; + unsigned busy_gen; restart: /* @@ -1412,6 +1365,7 @@ restart: cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_CNT); bno_cur = NULL; + busy = false; /* * Look for an entry >= maxlen+alignment-1 blocks. @@ -1421,14 +1375,13 @@ restart: goto error0; /* - * If none or we have busy extents that we cannot allocate from, then - * we have to settle for a smaller extent. In the case that there are - * no large extents, this will return the last entry in the tree unless - * the tree is empty. In the case that there are only busy large - * extents, this will return the largest small extent unless there + * If none then we have to settle for a smaller extent. In the case that + * there are no large extents, this will return the last entry in the + * tree unless the tree is empty. In the case that there are only busy + * large extents, this will return the largest small extent unless there * are no smaller extents available. */ - if (!i || forced > 1) { + if (!i) { error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, &flen, &i); if (error) @@ -1439,13 +1392,11 @@ restart: return 0; } ASSERT(i == 1); - xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); + busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, + &rlen, &busy_gen); } else { /* * Search for a non-busy extent that is large enough. - * If we are at low space, don't check, or if we fall of - * the end of the btree, turn off the busy check and - * restart. */ for (;;) { error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); @@ -1453,8 +1404,8 @@ restart: goto error0; XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - xfs_alloc_compute_aligned(args, fbno, flen, - &rbno, &rlen); + busy = xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen, &busy_gen); if (rlen >= args->maxlen) break; @@ -1466,18 +1417,13 @@ restart: /* * Our only valid extents must have been busy. * Make it unbusy by forcing the log out and - * retrying. If we've been here before, forcing - * the log isn't making the extents available, - * which means they have probably been freed in - * this transaction. In that case, we have to - * give up on them and we'll attempt a minlen - * allocation the next time around. + * retrying. */ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); trace_xfs_alloc_size_busy(args); - if (!forced++) - xfs_log_force(args->mp, XFS_LOG_SYNC); + xfs_extent_busy_flush(args->mp, + args->pag, busy_gen); goto restart; } } @@ -1513,8 +1459,8 @@ restart: XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); if (flen < bestrlen) break; - xfs_alloc_compute_aligned(args, fbno, flen, - &rbno, &rlen); + busy = xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen, &busy_gen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || (rlen <= flen && rbno + rlen <= fbno + flen), @@ -1543,18 +1489,16 @@ restart: */ args->len = rlen; if (rlen < args->minlen) { - if (!forced++) { + if (busy) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); trace_xfs_alloc_size_busy(args); - xfs_log_force(args->mp, XFS_LOG_SYNC); + xfs_extent_busy_flush(args->mp, args->pag, busy_gen); goto restart; } goto out_nominleft; } xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto out_nominleft; rlen = args->len; XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); /* @@ -2056,7 +2000,7 @@ xfs_alloc_space_available( int flags) { struct xfs_perag *pag = args->pag; - xfs_extlen_t longest; + xfs_extlen_t alloc_len, longest; xfs_extlen_t reservation; /* blocks that are still reserved */ int available; @@ -2066,17 +2010,28 @@ xfs_alloc_space_available( reservation = xfs_ag_resv_needed(pag, args->resv); /* do we have enough contiguous free space for the allocation? */ + alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free, reservation); - if ((args->minlen + args->alignment + args->minalignslop - 1) > longest) + if (longest < alloc_len) return false; /* do we have enough free space remaining for the allocation? */ available = (int)(pag->pagf_freeblks + pag->pagf_flcount - - reservation - min_free - args->total); - if (available < (int)args->minleft || available <= 0) + reservation - min_free - args->minleft); + if (available < (int)max(args->total, alloc_len)) return false; + /* + * Clamp maxlen to the amount of free space available for the actual + * extent allocation. + */ + if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) { + args->maxlen = available; + ASSERT(args->maxlen > 0); + ASSERT(args->maxlen >= args->minlen); + } + return true; } @@ -2122,7 +2077,8 @@ xfs_alloc_fix_freelist( } need = xfs_alloc_min_freelist(mp, pag); - if (!xfs_alloc_space_available(args, need, flags)) + if (!xfs_alloc_space_available(args, need, flags | + XFS_ALLOC_FLAG_CHECK)) goto out_agbp_relse; /* @@ -2455,12 +2411,15 @@ xfs_agf_verify( be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) return false; - if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || + if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) return false; if (xfs_sb_version_hasrmapbt(&mp->m_sb) && - be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS) + (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) return false; /* @@ -2477,7 +2436,8 @@ xfs_agf_verify( return false; if (xfs_sb_version_hasreflink(&mp->m_sb) && - be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS) + (be32_to_cpu(agf->agf_refcount_level) < 1 || + be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) return false; return true;; @@ -2634,12 +2594,10 @@ xfs_alloc_vextent( xfs_agblock_t agsize; /* allocation group size */ int error; int flags; /* XFS_ALLOC_FLAG_... locking flags */ - xfs_extlen_t minleft;/* minimum left value, temp copy */ xfs_mount_t *mp; /* mount structure pointer */ xfs_agnumber_t sagno; /* starting allocation group number */ xfs_alloctype_t type; /* input allocation type */ int bump_rotor = 0; - int no_min = 0; xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ mp = args->mp; @@ -2668,7 +2626,6 @@ xfs_alloc_vextent( trace_xfs_alloc_vextent_badargs(args); return 0; } - minleft = args->minleft; switch (type) { case XFS_ALLOCTYPE_THIS_AG: @@ -2679,9 +2636,7 @@ xfs_alloc_vextent( */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); args->pag = xfs_perag_get(mp, args->agno); - args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); - args->minleft = minleft; if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -2709,21 +2664,11 @@ xfs_alloc_vextent( args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); args->type = XFS_ALLOCTYPE_NEAR_BNO; /* FALLTHROUGH */ - case XFS_ALLOCTYPE_ANY_AG: - case XFS_ALLOCTYPE_START_AG: case XFS_ALLOCTYPE_FIRST_AG: /* * Rotate through the allocation groups looking for a winner. */ - if (type == XFS_ALLOCTYPE_ANY_AG) { - /* - * Start with the last place we left off. - */ - args->agno = sagno = (mp->m_agfrotor / rotorstep) % - mp->m_sb.sb_agcount; - args->type = XFS_ALLOCTYPE_THIS_AG; - flags = XFS_ALLOC_FLAG_TRYLOCK; - } else if (type == XFS_ALLOCTYPE_FIRST_AG) { + if (type == XFS_ALLOCTYPE_FIRST_AG) { /* * Start with allocation group given by bno. */ @@ -2732,8 +2677,6 @@ xfs_alloc_vextent( sagno = 0; flags = 0; } else { - if (type == XFS_ALLOCTYPE_START_AG) - args->type = XFS_ALLOCTYPE_THIS_AG; /* * Start with the given allocation group. */ @@ -2746,9 +2689,7 @@ xfs_alloc_vextent( */ for (;;) { args->pag = xfs_perag_get(mp, args->agno); - if (no_min) args->minleft = 0; error = xfs_alloc_fix_freelist(args, flags); - args->minleft = minleft; if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -2788,25 +2729,22 @@ xfs_alloc_vextent( * or switch to non-trylock mode. */ if (args->agno == sagno) { - if (no_min == 1) { + if (flags == 0) { args->agbno = NULLAGBLOCK; trace_xfs_alloc_vextent_allfailed(args); break; } - if (flags == 0) { - no_min = 1; - } else { - flags = 0; - if (type == XFS_ALLOCTYPE_START_BNO) { - args->agbno = XFS_FSB_TO_AGBNO(mp, - args->fsbno); - args->type = XFS_ALLOCTYPE_NEAR_BNO; - } + + flags = 0; + if (type == XFS_ALLOCTYPE_START_BNO) { + args->agbno = XFS_FSB_TO_AGBNO(mp, + args->fsbno); + args->type = XFS_ALLOCTYPE_NEAR_BNO; } } xfs_perag_put(args->pag); } - if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { + if (bump_rotor) { if (args->agno == sagno) mp->m_agfrotor = (mp->m_agfrotor + 1) % (mp->m_sb.sb_agcount * rotorstep); diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 7c404a6b0ae3..2a8d0fa6fbbe 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -29,9 +29,7 @@ extern struct workqueue_struct *xfs_alloc_wq; /* * Freespace allocation types. Argument to xfs_alloc_[v]extent. */ -#define XFS_ALLOCTYPE_ANY_AG 0x01 /* allocate anywhere, use rotor */ #define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */ -#define XFS_ALLOCTYPE_START_AG 0x04 /* anywhere, start in this a.g. */ #define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */ #define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */ #define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */ @@ -41,9 +39,7 @@ extern struct workqueue_struct *xfs_alloc_wq; typedef unsigned int xfs_alloctype_t; #define XFS_ALLOC_TYPES \ - { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \ { XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \ - { XFS_ALLOCTYPE_START_AG, "START_AG" }, \ { XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \ { XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \ { XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \ @@ -56,7 +52,7 @@ typedef unsigned int xfs_alloctype_t; #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ #define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */ #define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */ - +#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */ /* * Argument structure for xfs_alloc routines. diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 5ba2dac5e67c..efb467b10a71 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -421,13 +421,17 @@ xfs_allocbt_init_cursor( ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); cur->bc_tp = tp; cur->bc_mp = mp; cur->bc_btnum = btnum; cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_ops = &xfs_allocbt_ops; + if (btnum == XFS_BTNUM_BNO) + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); + else + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); if (btnum == XFS_BTNUM_CNT) { cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index af1ecb19121e..6622d46ddec3 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -131,9 +131,6 @@ xfs_attr_get( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if (!xfs_inode_hasattr(ip)) - return -ENOATTR; - error = xfs_attr_args_init(&args, ip, name, flags); if (error) return error; @@ -392,9 +389,6 @@ xfs_attr_remove( if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - if (!xfs_inode_hasattr(dp)) - return -ENOATTR; - error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 8ea91f363093..2852521fc8ec 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -253,6 +253,7 @@ xfs_attr3_leaf_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_perag *pag = bp->b_pag; struct xfs_attr3_icleaf_hdr ichdr; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); @@ -273,7 +274,12 @@ xfs_attr3_leaf_verify( if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) return false; } - if (ichdr.count == 0) + /* + * In recovery there is a transient state where count == 0 is valid + * because we may have transitioned an empty shortform attr to a leaf + * if the attr didn't fit in shortform. + */ + if (pag && pag->pagf_init && ichdr.count == 0) return false; /* XXX: need to range check rest of attr header values */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 4f2aed04f827..f7dda0c237b0 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -51,7 +51,7 @@ int xfs_attr_shortform_getvalue(struct xfs_da_args *args); int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); -int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); +int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); /* @@ -77,7 +77,7 @@ int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer, struct xfs_da_args *args); int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer, struct xfs_da_args *args); -int xfs_attr3_leaf_list_int(struct xfs_buf *bp, +void xfs_attr3_leaf_list_int(struct xfs_buf *bp, struct xfs_attr_list_context *context); /* diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index c6eb21940783..9bd104f32908 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -49,6 +49,8 @@ #include "xfs_rmap.h" #include "xfs_ag_resv.h" #include "xfs_refcount.h" +#include "xfs_rmap_btree.h" +#include "xfs_icache.h" kmem_zone_t *xfs_bmap_free_item_zone; @@ -190,8 +192,12 @@ xfs_bmap_worst_indlen( int maxrecs; /* maximum record count at this level */ xfs_mount_t *mp; /* mount structure */ xfs_filblks_t rval; /* return value */ + xfs_filblks_t orig_len; mp = ip->i_mount; + + /* Calculate the worst-case size of the bmbt. */ + orig_len = len; maxrecs = mp->m_bmap_dmxr[0]; for (level = 0, rval = 0; level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); @@ -199,12 +205,20 @@ xfs_bmap_worst_indlen( len += maxrecs - 1; do_div(len, maxrecs); rval += len; - if (len == 1) - return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - + if (len == 1) { + rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - level - 1; + break; + } if (level == 0) maxrecs = mp->m_bmap_dmxr[1]; } + + /* Calculate the worst-case size of the rmapbt. */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) + + mp->m_rmap_maxlevels; + return rval; } @@ -504,7 +518,7 @@ void xfs_bmap_trace_exlist( xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t cnt, /* count of entries in the list */ - int whichfork, /* data or attr fork */ + int whichfork, /* data or attr or cow fork */ unsigned long caller_ip) { xfs_extnum_t idx; /* extent record index */ @@ -513,11 +527,13 @@ xfs_bmap_trace_exlist( if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; + else if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT(cnt == xfs_iext_count(ifp)); for (idx = 0; idx < cnt; idx++) - trace_xfs_extlist(ip, idx, whichfork, caller_ip); + trace_xfs_extlist(ip, idx, state, caller_ip); } /* @@ -724,15 +740,9 @@ xfs_bmap_extents_to_btree( * Fill in the root. */ block = ifp->if_broot; - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, - XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, - XFS_BMAP_MAGIC, 1, 1, ip->i_ino, + xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, + XFS_BTNUM_BMAP, 1, 1, ip->i_ino, XFS_BTREE_LONG_PTRS); - /* * Need a cursor. Can't allocate until bb_level is filled in. */ @@ -753,8 +763,8 @@ xfs_bmap_extents_to_btree( args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); } else if (dfops->dop_low) { -try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; +try_another_ag: args.fsbno = *firstblock; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; @@ -780,17 +790,19 @@ try_another_ag: if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && args.fsbno == NULLFSBLOCK && args.type == XFS_ALLOCTYPE_NEAR_BNO) { - dfops->dop_low = true; + args.type = XFS_ALLOCTYPE_FIRST_AG; goto try_another_ag; } + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { + xfs_iroot_realloc(ip, -1, whichfork); + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return -ENOSPC; + } /* * Allocation can't fail, the space was reserved. */ - ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(*firstblock == NULLFSBLOCK || - args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || - (dfops->dop_low && - args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); + args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); *firstblock = cur->bc_private.b.firstblock = args.fsbno; cur->bc_private.b.allocated++; ip->i_d.di_nblocks++; @@ -801,17 +813,12 @@ try_another_ag: */ abp->b_ops = &xfs_bmbt_buf_ops; ablock = XFS_BUF_TO_BLOCK(abp); - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, ablock, abp->b_bn, - XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block_int(mp, ablock, abp->b_bn, - XFS_BMAP_MAGIC, 0, 0, ip->i_ino, + xfs_btree_init_block_int(mp, ablock, abp->b_bn, + XFS_BTNUM_BMAP, 0, 0, ip->i_ino, XFS_BTREE_LONG_PTRS); arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); for (cnt = i = 0; i < nextents; i++) { ep = xfs_iext_get_ext(ifp, i); if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { @@ -1137,6 +1144,10 @@ xfs_bmap_add_attrfork( goto trans_cancel; if (XFS_IFORK_Q(ip)) goto trans_cancel; + if (ip->i_d.di_anextents != 0) { + error = -EFSCORRUPTED; + goto trans_cancel; + } if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { /* * For inodes coming from pre-6.2 filesystems. @@ -1144,7 +1155,6 @@ xfs_bmap_add_attrfork( ASSERT(ip->i_d.di_aformat == 0); ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; } - ASSERT(ip->i_d.di_anextents == 0); xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -1259,7 +1269,6 @@ xfs_bmap_read_extents( /* REFERENCED */ xfs_extnum_t room; /* number of entries there's room for */ - bno = NULLFSBLOCK; mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : @@ -1272,9 +1281,7 @@ xfs_bmap_read_extents( ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); - ASSERT(bno != NULLFSBLOCK); - ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); + /* * Go down the tree until leaf level is reached, following the first * pointer (leftmost) at each level. @@ -1296,7 +1303,7 @@ xfs_bmap_read_extents( /* * Here with bp and block set to the leftmost leaf node in the tree. */ - room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + room = xfs_iext_count(ifp); i = 0; /* * Loop over all leaf nodes. Copy information to the extent records. @@ -1361,8 +1368,9 @@ xfs_bmap_read_extents( return error; block = XFS_BUF_TO_BLOCK(bp); } - ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); - ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); + if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) + return -EFSCORRUPTED; + ASSERT(i == xfs_iext_count(ifp)); XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); return 0; error0: @@ -1370,97 +1378,6 @@ error0: return -EFSCORRUPTED; } - -/* - * Search the extent records for the entry containing block bno. - * If bno lies in a hole, point to the next entry. If bno lies - * past eof, *eofp will be set, and *prevp will contain the last - * entry (null if none). Else, *lastxp will be set to the index - * of the found entry; *gotp will contain the entry. - */ -STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ -xfs_bmap_search_multi_extents( - xfs_ifork_t *ifp, /* inode fork pointer */ - xfs_fileoff_t bno, /* block number searched for */ - int *eofp, /* out: end of file found */ - xfs_extnum_t *lastxp, /* out: last extent index */ - xfs_bmbt_irec_t *gotp, /* out: extent entry found */ - xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ -{ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - xfs_extnum_t lastx; /* last extent index */ - - /* - * Initialize the extent entry structure to catch access to - * uninitialized br_startblock field. - */ - gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; - gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; - gotp->br_state = XFS_EXT_INVALID; - gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; - prevp->br_startoff = NULLFILEOFF; - - ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); - if (lastx > 0) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp); - } - if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { - xfs_bmbt_get_all(ep, gotp); - *eofp = 0; - } else { - if (lastx > 0) { - *gotp = *prevp; - } - *eofp = 1; - ep = NULL; - } - *lastxp = lastx; - return ep; -} - -/* - * Search the extents list for the inode, for the extent containing bno. - * If bno lies in a hole, point to the next entry. If bno lies past eof, - * *eofp will be set, and *prevp will contain the last entry (null if none). - * Else, *lastxp will be set to the index of the found - * entry; *gotp will contain the entry. - */ -xfs_bmbt_rec_host_t * /* pointer to found extent entry */ -xfs_bmap_search_extents( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_fileoff_t bno, /* block number searched for */ - int fork, /* data or attr fork */ - int *eofp, /* out: end of file found */ - xfs_extnum_t *lastxp, /* out: last extent index */ - xfs_bmbt_irec_t *gotp, /* out: extent entry found */ - xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ -{ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - - XFS_STATS_INC(ip->i_mount, xs_look_exlist); - ifp = XFS_IFORK_PTR(ip, fork); - - ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); - - if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && - !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { - xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, - "Access to block zero in inode %llu " - "start_block: %llx start_off: %llx " - "blkcnt: %llx extent-state: %x lastx: %x", - (unsigned long long)ip->i_ino, - (unsigned long long)gotp->br_startblock, - (unsigned long long)gotp->br_startoff, - (unsigned long long)gotp->br_blockcount, - gotp->br_state, *lastxp); - *lastxp = NULLEXTNUM; - *eofp = 1; - return NULL; - } - return ep; -} - /* * Returns the file-relative block number of the first unused block(s) * in the file with at least "len" logically contiguous blocks free. @@ -1497,7 +1414,7 @@ xfs_bmap_first_unused( (error = xfs_iread_extents(tp, ip, whichfork))) return error; lowest = *first_unused; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); off = xfs_bmbt_get_startoff(ep); @@ -1523,44 +1440,44 @@ xfs_bmap_first_unused( */ int /* error */ xfs_bmap_last_before( - xfs_trans_t *tp, /* transaction pointer */ - xfs_inode_t *ip, /* incore inode */ - xfs_fileoff_t *last_block, /* last block */ - int whichfork) /* data or attr fork */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t *last_block, /* last block */ + int whichfork) /* data or attr fork */ { - xfs_fileoff_t bno; /* input file offset */ - int eof; /* hit end of file */ - xfs_bmbt_rec_host_t *ep; /* pointer to last extent */ - int error; /* error return value */ - xfs_bmbt_irec_t got; /* current extent value */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_extnum_t lastx; /* last extent used */ - xfs_bmbt_irec_t prev; /* previous extent value */ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_bmbt_irec got; + xfs_extnum_t idx; + int error; - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) - return -EIO; - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + switch (XFS_IFORK_FORMAT(ip, whichfork)) { + case XFS_DINODE_FMT_LOCAL: *last_block = 0; return 0; + case XFS_DINODE_FMT_BTREE: + case XFS_DINODE_FMT_EXTENTS: + break; + default: + return -EIO; } - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!(ifp->if_flags & XFS_IFEXTENTS) && - (error = xfs_iread_extents(tp, ip, whichfork))) - return error; - bno = *last_block - 1; - ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, - &prev); - if (eof || xfs_bmbt_get_startoff(ep) > bno) { - if (prev.br_startoff == NULLFILEOFF) - *last_block = 0; - else - *last_block = prev.br_startoff + prev.br_blockcount; + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; } - /* - * Otherwise *last_block is already the right answer. - */ + + if (xfs_iext_lookup_extent(ip, ifp, *last_block - 1, &idx, &got)) { + if (got.br_startoff <= *last_block - 1) + return 0; + } + + if (xfs_iext_get_extent(ifp, idx - 1, &got)) { + *last_block = got.br_startoff + got.br_blockcount; + return 0; + } + + *last_block = 0; return 0; } @@ -1582,7 +1499,7 @@ xfs_bmap_last_extent( return error; } - nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { *is_empty = 1; return 0; @@ -1735,7 +1652,7 @@ xfs_bmap_add_extent_delay_real( &bma->ip->i_d.di_nextents); ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(bma->idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); @@ -1794,7 +1711,7 @@ xfs_bmap_add_extent_delay_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (bma->idx < xfs_iext_count(ifp) - 1) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); @@ -1935,6 +1852,7 @@ xfs_bmap_add_extent_delay_real( */ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock); + xfs_bmbt_set_state(ep, new->br_state); trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); (*nextents)++; @@ -2273,6 +2191,7 @@ STATIC int /* error */ xfs_bmap_add_extent_unwritten_real( struct xfs_trans *tp, xfs_inode_t *ip, /* incore inode pointer */ + int whichfork, xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ @@ -2292,15 +2211,17 @@ xfs_bmap_add_extent_unwritten_real( /* left is 0, right is 1, prev is 2 */ int rval=0; /* return value (logging flags) */ int state = 0;/* state bits, accessed thru macros */ - struct xfs_mount *mp = tp->t_mountp; + struct xfs_mount *mp = ip->i_mount; *logflagsp = 0; cur = *curp; - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + ifp = XFS_IFORK_PTR(ip, whichfork); + if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; ASSERT(*idx >= 0); - ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); XFS_STATS_INC(mp, xs_add_exlist); @@ -2356,7 +2277,7 @@ xfs_bmap_add_extent_unwritten_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (*idx < xfs_iext_count(ifp) - 1) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) @@ -2396,7 +2317,8 @@ xfs_bmap_add_extent_unwritten_real( trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); xfs_iext_remove(ip, *idx + 1, 2, state); - ip->i_d.di_nextents -= 2; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 2); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2439,7 +2361,8 @@ xfs_bmap_add_extent_unwritten_real( trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); xfs_iext_remove(ip, *idx + 1, 1, state); - ip->i_d.di_nextents--; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2474,7 +2397,8 @@ xfs_bmap_add_extent_unwritten_real( xfs_bmbt_set_state(ep, newext); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); xfs_iext_remove(ip, *idx + 1, 1, state); - ip->i_d.di_nextents--; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2586,7 +2510,8 @@ xfs_bmap_add_extent_unwritten_real( trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); xfs_iext_insert(ip, *idx, 1, new, state); - ip->i_d.di_nextents++; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2664,7 +2589,8 @@ xfs_bmap_add_extent_unwritten_real( ++*idx; xfs_iext_insert(ip, *idx, 1, new, state); - ip->i_d.di_nextents++; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2712,7 +2638,8 @@ xfs_bmap_add_extent_unwritten_real( ++*idx; xfs_iext_insert(ip, *idx, 2, &r[0], state); - ip->i_d.di_nextents += 2; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 2); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2766,17 +2693,17 @@ xfs_bmap_add_extent_unwritten_real( } /* update reverse mappings */ - error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new); + error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new); if (error) goto done; /* convert to a btree if necessary */ - if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { + if (xfs_bmap_needs_btree(ip, whichfork)) { int tmp_logflags; /* partial log flag return val */ ASSERT(cur == NULL); error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, - 0, &tmp_logflags, XFS_DATA_FORK); + 0, &tmp_logflags, whichfork); *logflagsp |= tmp_logflags; if (error) goto done; @@ -2788,7 +2715,7 @@ xfs_bmap_add_extent_unwritten_real( *curp = cur; } - xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); + xfs_bmap_check_leaf_extents(*curp, ip, whichfork); done: *logflagsp |= rval; return error; @@ -2836,7 +2763,7 @@ xfs_bmap_add_extent_hole_delay( * Check and set flags if the current (right) segment exists. * If it doesn't exist, we're converting the hole at end-of-file. */ - if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (*idx < xfs_iext_count(ifp)) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); @@ -2880,7 +2807,8 @@ xfs_bmap_add_extent_hole_delay( oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock) + startblockval(right.br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + oldlen); xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), nullstartblock((int)newlen)); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); @@ -2901,7 +2829,8 @@ xfs_bmap_add_extent_hole_delay( xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + oldlen); xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), nullstartblock((int)newlen)); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); @@ -2917,7 +2846,8 @@ xfs_bmap_add_extent_hole_delay( temp = new->br_blockcount + right.br_blockcount; oldlen = startblockval(new->br_startblock) + startblockval(right.br_startblock); - newlen = xfs_bmap_worst_indlen(ip, temp); + newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + oldlen); xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), new->br_startoff, nullstartblock((int)newlen), temp, right.br_state); @@ -2966,17 +2896,18 @@ xfs_bmap_add_extent_hole_real( ifp = XFS_IFORK_PTR(bma->ip, whichfork); ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(bma->idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - ASSERT(whichfork != XFS_COW_FORK); XFS_STATS_INC(mp, xs_add_exlist); state = 0; if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; + if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; /* * Check and set flags if this segment has a left neighbor. @@ -2992,7 +2923,7 @@ xfs_bmap_add_extent_hole_real( * Check and set flags if this segment has a current value. * Not true if we're inserting into the "hole" at eof. */ - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (bma->idx < xfs_iext_count(ifp)) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); if (isnullstartblock(right.br_startblock)) @@ -3700,7 +3631,7 @@ xfs_bmap_btalloc( align = xfs_get_cowextsz_hint(ap->ip); else if (xfs_alloc_is_userdata(ap->datatype)) align = xfs_get_extsz_hint(ap->ip); - if (unlikely(align)) { + if (align) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, ap->eof, 0, ap->conv, &ap->offset, &ap->length); @@ -3772,7 +3703,7 @@ xfs_bmap_btalloc( args.minlen = ap->minlen; } /* apply extent size hints if obtained earlier */ - if (unlikely(align)) { + if (align) { args.prod = align; if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); @@ -3883,7 +3814,6 @@ xfs_bmap_btalloc( args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; args.total = ap->minlen; - args.minleft = 0; if ((error = xfs_alloc_vextent(&args))) return error; ap->dfops->dop_low = true; @@ -3894,17 +3824,13 @@ xfs_bmap_btalloc( * the first block that was allocated. */ ASSERT(*ap->firstblock == NULLFSBLOCK || - XFS_FSB_TO_AGNO(mp, *ap->firstblock) == - XFS_FSB_TO_AGNO(mp, args.fsbno) || - (ap->dfops->dop_low && - XFS_FSB_TO_AGNO(mp, *ap->firstblock) < - XFS_FSB_TO_AGNO(mp, args.fsbno))); + XFS_FSB_TO_AGNO(mp, *ap->firstblock) <= + XFS_FSB_TO_AGNO(mp, args.fsbno)); ap->blkno = args.fsbno; if (*ap->firstblock == NULLFSBLOCK) *ap->firstblock = args.fsbno; - ASSERT(nullfb || fb_agno == args.agno || - (ap->dfops->dop_low && fb_agno < args.agno)); + ASSERT(nullfb || fb_agno <= args.agno); ap->length = args.len; if (!(ap->flags & XFS_BMAPI_COWFORK)) ap->ip->i_d.di_nblocks += args.len; @@ -4145,12 +4071,11 @@ xfs_bmapi_read( struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; struct xfs_bmbt_irec got; - struct xfs_bmbt_irec prev; xfs_fileoff_t obno; xfs_fileoff_t end; - xfs_extnum_t lastx; + xfs_extnum_t idx; int error; - int eof; + bool eof = false; int n = 0; int whichfork = xfs_bmapi_whichfork(flags); @@ -4190,7 +4115,8 @@ xfs_bmapi_read( return error; } - xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); + if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) + eof = true; end = bno + len; obno = bno; @@ -4221,23 +4147,34 @@ xfs_bmapi_read( break; /* Else go on to the next record. */ - if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); - else - eof = 1; + if (!xfs_iext_get_extent(ifp, ++idx, &got)) + eof = true; } *nmap = n; return 0; } +/* + * Add a delayed allocation extent to an inode. Blocks are reserved from the + * global pool and the extent inserted into the inode in-core extent tree. + * + * On entry, got refers to the first extent beyond the offset of the extent to + * allocate or eof is specified if no such extent exists. On return, got refers + * to the extent record that was inserted to the inode fork. + * + * Note that the allocated extent may have been merged with contiguous extents + * during insertion into the inode fork. Thus, got does not reflect the current + * state of the inode fork on return. If necessary, the caller can use lastx to + * look up the updated record in the inode fork. + */ int xfs_bmapi_reserve_delalloc( struct xfs_inode *ip, int whichfork, - xfs_fileoff_t aoff, + xfs_fileoff_t off, xfs_filblks_t len, + xfs_filblks_t prealloc, struct xfs_bmbt_irec *got, - struct xfs_bmbt_irec *prev, xfs_extnum_t *lastx, int eof) { @@ -4248,10 +4185,17 @@ xfs_bmapi_reserve_delalloc( char rt = XFS_IS_REALTIME_INODE(ip); xfs_extlen_t extsz; int error; + xfs_fileoff_t aoff = off; - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN); + /* + * Cap the alloc length. Keep track of prealloc so we know whether to + * tag the inode before we return. + */ + alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN); if (!eof) alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); + if (prealloc && alen >= len) + prealloc = alen - len; /* Figure out the extent size, adjust alen */ if (whichfork == XFS_COW_FORK) @@ -4259,7 +4203,12 @@ xfs_bmapi_reserve_delalloc( else extsz = xfs_get_extsz_hint(ip); if (extsz) { - error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, + struct xfs_bmbt_irec prev; + + if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev)) + prev.br_startoff = NULLFILEOFF; + + error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, 1, 0, &aoff, &alen); ASSERT(!error); } @@ -4304,18 +4253,19 @@ xfs_bmapi_reserve_delalloc( got->br_startblock = nullstartblock(indlen); got->br_blockcount = alen; got->br_state = XFS_EXT_NORM; + xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); /* - * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay - * might have merged it into one of the neighbouring ones. + * Tag the inode if blocks were preallocated. Note that COW fork + * preallocation can occur at the start or end of the extent, even when + * prealloc == 0, so we must also check the aligned offset and length. */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); + if (whichfork == XFS_DATA_FORK && prealloc) + xfs_inode_set_eofblocks_tag(ip); + if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) + xfs_inode_set_cowblocks_tag(ip); - ASSERT(got->br_startoff <= aoff); - ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); - ASSERT(isnullstartblock(got->br_startblock)); - ASSERT(got->br_state == XFS_EXT_NORM); return 0; out_unreserve_blocks: @@ -4349,7 +4299,7 @@ xfs_bmapi_allocate( if (bma->wasdel) { bma->length = (xfs_extlen_t)bma->got.br_blockcount; bma->offset = bma->got.br_startoff; - if (bma->idx != NULLEXTNUM && bma->idx) { + if (bma->idx) { xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &bma->prev); } @@ -4395,8 +4345,6 @@ xfs_bmapi_allocate( if (error) return error; - if (bma->dfops->dop_low) - bma->minleft = 0; if (bma->cur) bma->cur->bc_private.b.firstblock = *bma->firstblock; if (bma->blkno == NULLFSBLOCK) @@ -4422,10 +4370,16 @@ xfs_bmapi_allocate( bma->got.br_state = XFS_EXT_NORM; /* - * A wasdelay extent has been initialized, so shouldn't be flagged - * as unwritten. + * In the data fork, a wasdelay extent has been initialized, so + * shouldn't be flagged as unwritten. + * + * For the cow fork, however, we convert delalloc reservations + * (extents allocated for speculative preallocation) to + * allocated unwritten extents, and only convert the unwritten + * extents to real extents when we're about to write the data. */ - if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && + if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && + (bma->flags & XFS_BMAPI_PREALLOC) && xfs_sb_version_hasextflgbit(&mp->m_sb)) bma->got.br_state = XFS_EXT_UNWRITTEN; @@ -4476,8 +4430,6 @@ xfs_bmapi_convert_unwritten( (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) return 0; - ASSERT(whichfork != XFS_COW_FORK); - /* * Modify (by adding) the state flag, if writing. */ @@ -4502,8 +4454,8 @@ xfs_bmapi_convert_unwritten( return error; } - error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, - &bma->cur, mval, bma->firstblock, bma->dfops, + error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, + &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops, &tmp_logflags); /* * Log the inode core unconditionally in the unwritten extent conversion @@ -4512,8 +4464,12 @@ xfs_bmapi_convert_unwritten( * in the transaction for the sake of fsync(), even if nothing has * changed, because fsync() will not force the log for this transaction * unless it sees the inode pinned. + * + * Note: If we're only converting cow fork extents, there aren't + * any on-disk updates to make, so we don't need to log anything. */ - bma->logflags |= tmp_logflags | XFS_ILOG_CORE; + if (whichfork != XFS_COW_FORK) + bma->logflags |= tmp_logflags | XFS_ILOG_CORE; if (error) return error; @@ -4563,13 +4519,11 @@ xfs_bmapi_write( struct xfs_ifork *ifp; struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */ xfs_fileoff_t end; /* end of mapped file region */ - int eof; /* after the end of extents */ + bool eof = false; /* after the end of extents */ int error; /* error return */ int n; /* current extent index */ xfs_fileoff_t obno; /* old block number (offset) */ int whichfork; /* data or attr fork */ - char inhole; /* current location is hole in file */ - char wasdelay; /* old extent was delayed */ #ifdef DEBUG xfs_fileoff_t orig_bno; /* original block number value */ @@ -4589,15 +4543,15 @@ xfs_bmapi_write( ASSERT(*nmap >= 1); ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); ASSERT(!(flags & XFS_BMAPI_IGSTATE)); - ASSERT(tp != NULL); + ASSERT(tp != NULL || + (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) == + (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)); ASSERT(len > 0); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); - ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK); - ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK); /* zeroing is for currently only for data extents, not metadata */ ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != @@ -4641,12 +4595,14 @@ xfs_bmapi_write( goto error0; } - xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got, - &bma.prev); n = 0; end = bno + len; obno = bno; + if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.idx, &bma.got)) + eof = true; + if (!xfs_iext_get_extent(ifp, bma.idx - 1, &bma.prev)) + bma.prev.br_startoff = NULLFILEOFF; bma.tp = tp; bma.ip = ip; bma.total = total; @@ -4655,22 +4611,44 @@ xfs_bmapi_write( bma.firstblock = firstblock; while (bno < end && n < *nmap) { - inhole = eof || bma.got.br_startoff > bno; - wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); + bool need_alloc = false, wasdelay = false; - /* - * Make sure we only reflink into a hole. - */ - if (flags & XFS_BMAPI_REMAP) - ASSERT(inhole); - if (flags & XFS_BMAPI_COWFORK) - ASSERT(!inhole); + /* in hole or beyoned EOF? */ + if (eof || bma.got.br_startoff > bno) { + if (flags & XFS_BMAPI_DELALLOC) { + /* + * For the COW fork we can reasonably get a + * request for converting an extent that races + * with other threads already having converted + * part of it, as there converting COW to + * regular blocks is not protected using the + * IOLOCK. + */ + ASSERT(flags & XFS_BMAPI_COWFORK); + if (!(flags & XFS_BMAPI_COWFORK)) { + error = -EIO; + goto error0; + } + + if (eof || bno >= end) + break; + } else { + need_alloc = true; + } + } else { + /* + * Make sure we only reflink into a hole. + */ + ASSERT(!(flags & XFS_BMAPI_REMAP)); + if (isnullstartblock(bma.got.br_startblock)) + wasdelay = true; + } /* * First, deal with the hole before the allocated space * that we found, if any. */ - if (inhole || wasdelay) { + if (need_alloc || wasdelay) { bma.eof = eof; bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.wasdel = wasdelay; @@ -4733,11 +4711,8 @@ xfs_bmapi_write( /* Else go on to the next record. */ bma.prev = bma.got; - if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx), - &bma.got); - } else - eof = 1; + if (!xfs_iext_get_extent(ifp, ++bma.idx, &bma.got)) + eof = true; } *nmap = n; @@ -4781,13 +4756,9 @@ error0: if (bma.cur) { if (!error) { ASSERT(*firstblock == NULLFSBLOCK || - XFS_FSB_TO_AGNO(mp, *firstblock) == + XFS_FSB_TO_AGNO(mp, *firstblock) <= XFS_FSB_TO_AGNO(mp, - bma.cur->bc_private.b.firstblock) || - (dfops->dop_low && - XFS_FSB_TO_AGNO(mp, *firstblock) < - XFS_FSB_TO_AGNO(mp, - bma.cur->bc_private.b.firstblock))); + bma.cur->bc_private.b.firstblock)); *firstblock = bma.cur->bc_private.b.firstblock; } xfs_btree_del_cursor(bma.cur, @@ -4822,34 +4793,59 @@ xfs_bmap_split_indlen( xfs_filblks_t len2 = *indlen2; xfs_filblks_t nres = len1 + len2; /* new total res. */ xfs_filblks_t stolen = 0; + xfs_filblks_t resfactor; /* * Steal as many blocks as we can to try and satisfy the worst case * indlen for both new extents. */ - while (nres > ores && avail) { - nres--; - avail--; - stolen++; - } + if (ores < nres && avail) + stolen = XFS_FILBLKS_MIN(nres - ores, avail); + ores += stolen; + + /* nothing else to do if we've satisfied the new reservation */ + if (ores >= nres) + return stolen; /* - * The only blocks available are those reserved for the original - * extent and what we can steal from the extent being removed. - * If this still isn't enough to satisfy the combined - * requirements for the two new extents, skim blocks off of each - * of the new reservations until they match what is available. + * We can't meet the total required reservation for the two extents. + * Calculate the percent of the overall shortage between both extents + * and apply this percentage to each of the requested indlen values. + * This distributes the shortage fairly and reduces the chances that one + * of the two extents is left with nothing when extents are repeatedly + * split. */ - while (nres > ores) { - if (len1) { - len1--; - nres--; + resfactor = (ores * 100); + do_div(resfactor, nres); + len1 *= resfactor; + do_div(len1, 100); + len2 *= resfactor; + do_div(len2, 100); + ASSERT(len1 + len2 <= ores); + ASSERT(len1 < *indlen1 && len2 < *indlen2); + + /* + * Hand out the remainder to each extent. If one of the two reservations + * is zero, we want to make sure that one gets a block first. The loop + * below starts with len1, so hand len2 a block right off the bat if it + * is zero. + */ + ores -= (len1 + len2); + ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores); + if (ores && !len2 && *indlen2) { + len2++; + ores--; + } + while (ores) { + if (len1 < *indlen1) { + len1++; + ores--; } - if (nres == ores) + if (!ores) break; - if (len2) { - len2--; - nres--; + if (len2 < *indlen2) { + len2++; + ores--; } } @@ -4885,7 +4881,7 @@ xfs_bmap_del_extent_delay( da_new = 0; ASSERT(*idx >= 0); - ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(del->br_blockcount > 0); ASSERT(got->br_startoff <= del->br_startoff); ASSERT(got_endoff >= del_endoff); @@ -4902,8 +4898,11 @@ xfs_bmap_del_extent_delay( * sb counters as we might have to borrow some blocks for the * indirect block accounting. */ - xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0, + error = xfs_trans_reserve_quota_nblks(NULL, ip, + -((long)del->br_blockcount), 0, isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + if (error) + return error; ip->i_delayed_blks -= del->br_blockcount; if (whichfork == XFS_COW_FORK) @@ -5013,7 +5012,7 @@ xfs_bmap_del_extent_cow( got_endoff = got->br_startoff + got->br_blockcount; ASSERT(*idx >= 0); - ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(del->br_blockcount > 0); ASSERT(got->br_startoff <= del->br_startoff); ASSERT(got_endoff >= del_endoff); @@ -5119,8 +5118,7 @@ xfs_bmap_del_extent( state |= BMAP_COWFORK; ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / - (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp))); ASSERT(del->br_blockcount > 0); ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &got); @@ -5434,8 +5432,6 @@ __xfs_bunmapi( { xfs_btree_cur_t *cur; /* bmap btree cursor */ xfs_bmbt_irec_t del; /* extent being deleted */ - int eof; /* is deleting at eof */ - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ int error; /* error return value */ xfs_extnum_t extno; /* extent number in list */ xfs_bmbt_irec_t got; /* current extent record */ @@ -5445,8 +5441,6 @@ __xfs_bunmapi( int logflags; /* transaction logging flags */ xfs_extlen_t mod; /* rt extent offset */ xfs_mount_t *mp; /* mount structure */ - xfs_extnum_t nextents; /* number of file extents */ - xfs_bmbt_irec_t prev; /* previous extent record */ xfs_fileoff_t start; /* first file offset deleted */ int tmp_logflags; /* partial logging flags */ int wasdel; /* was a delayed alloc extent */ @@ -5477,8 +5471,7 @@ __xfs_bunmapi( if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (nextents == 0) { + if (xfs_iext_count(ifp) == 0) { *rlen = 0; return 0; } @@ -5486,18 +5479,17 @@ __xfs_bunmapi( isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); start = bno; bno = start + len - 1; - ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, - &prev); /* * Check to see if the given block number is past the end of the * file, back up to the last block if so... */ - if (eof) { - ep = xfs_iext_get_ext(ifp, --lastx); - xfs_bmbt_get_all(ep, &got); + if (!xfs_iext_lookup_extent(ip, ifp, bno, &lastx, &got)) { + ASSERT(lastx > 0); + xfs_iext_get_extent(ifp, --lastx, &got); bno = got.br_startoff + got.br_blockcount - 1; } + logflags = 0; if (ifp->if_flags & XFS_IFBROOT) { ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); @@ -5528,8 +5520,7 @@ __xfs_bunmapi( if (got.br_startoff > bno) { if (--lastx < 0) break; - ep = xfs_iext_get_ext(ifp, lastx); - xfs_bmbt_get_all(ep, &got); + xfs_iext_get_extent(ifp, lastx, &got); } /* * Is the last block of this extent before the range @@ -5543,7 +5534,6 @@ __xfs_bunmapi( * Then deal with the (possibly delayed) allocated space * we found. */ - ASSERT(ep != NULL); del = got; wasdel = isnullstartblock(del.br_startblock); if (got.br_startoff < start) { @@ -5597,8 +5587,8 @@ __xfs_bunmapi( } del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent_unwritten_real(tp, ip, - &lastx, &cur, &del, firstblock, dfops, - &logflags); + whichfork, &lastx, &cur, &del, + firstblock, dfops, &logflags); if (error) goto error0; goto nodelete; @@ -5624,15 +5614,12 @@ __xfs_bunmapi( */ ASSERT(bno >= del.br_blockcount); bno -= del.br_blockcount; - if (got.br_startoff > bno) { - if (--lastx >= 0) { - ep = xfs_iext_get_ext(ifp, - lastx); - xfs_bmbt_get_all(ep, &got); - } - } + if (got.br_startoff > bno && --lastx >= 0) + xfs_iext_get_extent(ifp, lastx, &got); continue; } else if (del.br_state == XFS_EXT_UNWRITTEN) { + struct xfs_bmbt_irec prev; + /* * This one is already unwritten. * It must have a written left neighbor. @@ -5640,8 +5627,7 @@ __xfs_bunmapi( * try again. */ ASSERT(lastx > 0); - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, - lastx - 1), &prev); + xfs_iext_get_extent(ifp, lastx - 1, &prev); ASSERT(prev.br_state == XFS_EXT_NORM); ASSERT(!isnullstartblock(prev.br_startblock)); ASSERT(del.br_startblock == @@ -5655,8 +5641,9 @@ __xfs_bunmapi( prev.br_state = XFS_EXT_UNWRITTEN; lastx--; error = xfs_bmap_add_extent_unwritten_real(tp, - ip, &lastx, &cur, &prev, - firstblock, dfops, &logflags); + ip, whichfork, &lastx, &cur, + &prev, firstblock, dfops, + &logflags); if (error) goto error0; goto nodelete; @@ -5664,8 +5651,9 @@ __xfs_bunmapi( ASSERT(del.br_state == XFS_EXT_NORM); del.br_state = XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent_unwritten_real(tp, - ip, &lastx, &cur, &del, - firstblock, dfops, &logflags); + ip, whichfork, &lastx, &cur, + &del, firstblock, dfops, + &logflags); if (error) goto error0; goto nodelete; @@ -5739,13 +5727,9 @@ nodelete: */ if (bno != (xfs_fileoff_t)-1 && bno >= start) { if (lastx >= 0) { - ep = xfs_iext_get_ext(ifp, lastx); - if (xfs_bmbt_get_startoff(ep) > bno) { - if (--lastx >= 0) - ep = xfs_iext_get_ext(ifp, - lastx); - } - xfs_bmbt_get_all(ep, &got); + xfs_iext_get_extent(ifp, lastx, &got); + if (got.br_startoff > bno && --lastx >= 0) + xfs_iext_get_extent(ifp, lastx, &got); } extno++; } @@ -5963,7 +5947,7 @@ xfs_bmse_shift_one( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); xfs_bmbt_get_all(gotp, &got); @@ -6140,7 +6124,7 @@ xfs_bmap_shift_extents( * are collapsing out, so we cannot use the count of real extents here. * Instead we have to calculate it from the incore fork. */ - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); if (total_extents == 0) { *done = 1; goto del_cursor; @@ -6200,7 +6184,7 @@ xfs_bmap_shift_extents( * count can change. Update the total and grade the next record. */ if (direction == SHIFT_LEFT) { - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); stop_extent = total_extents; } diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 7cae6ec27fa6..cdef87db5262 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -110,6 +110,9 @@ struct xfs_extent_free_item /* Map something in the CoW fork. */ #define XFS_BMAPI_COWFORK 0x200 +/* Only convert delalloc space, don't allocate entirely new extents */ +#define XFS_BMAPI_DELALLOC 0x400 + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -120,7 +123,8 @@ struct xfs_extent_free_item { XFS_BMAPI_CONVERT, "CONVERT" }, \ { XFS_BMAPI_ZERO, "ZERO" }, \ { XFS_BMAPI_REMAP, "REMAP" }, \ - { XFS_BMAPI_COWFORK, "COWFORK" } + { XFS_BMAPI_COWFORK, "COWFORK" }, \ + { XFS_BMAPI_DELALLOC, "DELALLOC" } static inline int xfs_bmapi_aflag(int w) @@ -237,14 +241,9 @@ int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_defer_ops *dfops, enum shift_direction direction, int num_exts); int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); -struct xfs_bmbt_rec_host * - xfs_bmap_search_extents(struct xfs_inode *ip, xfs_fileoff_t bno, - int fork, int *eofp, xfs_extnum_t *lastxp, - struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp); int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, - xfs_fileoff_t aoff, xfs_filblks_t len, - struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *prev, - xfs_extnum_t *lastx, int eof); + xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, + struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof); enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 8007d2ba9aef..fd55db479385 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -71,15 +71,9 @@ xfs_bmdr_to_bmbt( xfs_bmbt_key_t *tkp; __be64 *tpp; - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, - XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, - XFS_BMAP_MAGIC, 0, 0, ip->i_ino, + xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, + XFS_BTNUM_BMAP, 0, 0, ip->i_ino, XFS_BTREE_LONG_PTRS); - rblock->bb_level = dblock->bb_level; ASSERT(be16_to_cpu(rblock->bb_level) > 0); rblock->bb_numrecs = dblock->bb_numrecs; @@ -453,8 +447,8 @@ xfs_bmbt_alloc_block( if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); -try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; +try_another_ag: /* * Make sure there is sufficient room left in the AG to * complete a full tree split for an extent insert. If @@ -494,26 +488,25 @@ try_another_ag: if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && args.fsbno == NULLFSBLOCK && args.type == XFS_ALLOCTYPE_NEAR_BNO) { - cur->bc_private.b.dfops->dop_low = true; args.fsbno = cur->bc_private.b.firstblock; + args.type = XFS_ALLOCTYPE_FIRST_AG; goto try_another_ag; } if (args.fsbno == NULLFSBLOCK && args.minleft) { /* * Could not find an AG with enough free space to satisfy - * a full btree split. Try again without minleft and if + * a full btree split. Try again and if * successful activate the lowspace algorithm. */ args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; - args.minleft = 0; error = xfs_alloc_vextent(&args); if (error) goto error0; cur->bc_private.b.dfops->dop_low = true; } - if (args.fsbno == NULLFSBLOCK) { + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; @@ -796,13 +789,14 @@ xfs_bmbt_init_cursor( struct xfs_btree_cur *cur; ASSERT(whichfork != XFS_COW_FORK); - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); cur->bc_tp = tp; cur->bc_mp = mp; cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; cur->bc_btnum = XFS_BTNUM_BMAP; cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2); cur->bc_ops = &xfs_bmbt_ops; cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 0e80993c8a59..c3decedc9455 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -50,8 +50,18 @@ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, XFS_REFC_CRC_MAGIC } }; -#define xfs_btree_magic(cur) \ - xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] + +__uint32_t +xfs_btree_magic( + int crc, + xfs_btnum_t btnum) +{ + __uint32_t magic = xfs_magics[crc][btnum]; + + /* Ensure we asked for crc for crc-only magics. */ + ASSERT(magic != 0); + return magic; +} STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lblock( @@ -62,10 +72,13 @@ xfs_btree_check_lblock( { int lblock_ok = 1; /* block passes checks */ struct xfs_mount *mp; /* file system mount point */ + xfs_btnum_t btnum = cur->bc_btnum; + int crc; mp = cur->bc_mp; + crc = xfs_sb_version_hascrc(&mp->m_sb); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (crc) { lblock_ok = lblock_ok && uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid) && @@ -74,7 +87,7 @@ xfs_btree_check_lblock( } lblock_ok = lblock_ok && - be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && + be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && @@ -110,13 +123,16 @@ xfs_btree_check_sblock( struct xfs_agf *agf; /* ag. freespace structure */ xfs_agblock_t agflen; /* native ag. freespace length */ int sblock_ok = 1; /* block passes checks */ + xfs_btnum_t btnum = cur->bc_btnum; + int crc; mp = cur->bc_mp; + crc = xfs_sb_version_hascrc(&mp->m_sb); agbp = cur->bc_private.a.agbp; agf = XFS_BUF_TO_AGF(agbp); agflen = be32_to_cpu(agf->agf_length); - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (crc) { sblock_ok = sblock_ok && uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid) && @@ -125,7 +141,7 @@ xfs_btree_check_sblock( } sblock_ok = sblock_ok && - be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && + be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && @@ -810,7 +826,8 @@ xfs_btree_read_bufl( xfs_daddr_t d; /* real disk block address */ int error; - ASSERT(fsbno != NULLFSBLOCK); + if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) + return -EFSCORRUPTED; d = XFS_FSB_TO_DADDR(mp, fsbno); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, mp->m_bsize, lock, &bp, ops); @@ -1084,12 +1101,15 @@ xfs_btree_init_block_int( struct xfs_mount *mp, struct xfs_btree_block *buf, xfs_daddr_t blkno, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, unsigned int flags) { + int crc = xfs_sb_version_hascrc(&mp->m_sb); + __u32 magic = xfs_btree_magic(crc, btnum); + buf->bb_magic = cpu_to_be32(magic); buf->bb_level = cpu_to_be16(level); buf->bb_numrecs = cpu_to_be16(numrecs); @@ -1097,7 +1117,7 @@ xfs_btree_init_block_int( if (flags & XFS_BTREE_LONG_PTRS) { buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); - if (flags & XFS_BTREE_CRC_BLOCKS) { + if (crc) { buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); buf->bb_u.l.bb_owner = cpu_to_be64(owner); uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); @@ -1110,7 +1130,7 @@ xfs_btree_init_block_int( buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); - if (flags & XFS_BTREE_CRC_BLOCKS) { + if (crc) { buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); buf->bb_u.s.bb_owner = cpu_to_be32(__owner); uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); @@ -1123,14 +1143,14 @@ void xfs_btree_init_block( struct xfs_mount *mp, struct xfs_buf *bp, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, unsigned int flags) { xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - magic, level, numrecs, owner, flags); + btnum, level, numrecs, owner, flags); } STATIC void @@ -1140,7 +1160,7 @@ xfs_btree_init_block_cur( int level, int numrecs) { - __u64 owner; + __u64 owner; /* * we can pull the owner from the cursor right now as the different @@ -1154,7 +1174,7 @@ xfs_btree_init_block_cur( owner = cur->bc_private.a.agno; xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - xfs_btree_magic(cur), level, numrecs, + cur->bc_btnum, level, numrecs, owner, cur->bc_flags); } @@ -1769,8 +1789,28 @@ xfs_btree_lookup_get_block( if (error) return error; + /* Check the inode owner since the verifiers don't. */ + if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && + (cur->bc_flags & XFS_BTREE_LONG_PTRS) && + be64_to_cpu((*blkp)->bb_u.l.bb_owner) != + cur->bc_private.b.ip->i_ino) + goto out_bad; + + /* Did we get the level we were looking for? */ + if (be16_to_cpu((*blkp)->bb_level) != level) + goto out_bad; + + /* Check that internal nodes have at least one record. */ + if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) + goto out_bad; + xfs_btree_setbuf(cur, level, bp); return 0; + +out_bad: + *blkp = NULL; + xfs_trans_brelse(cur->bc_tp, bp); + return -EFSCORRUPTED; } /* diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index c2b01d1c79ee..4bb62580a7fd 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -76,6 +76,8 @@ union xfs_btree_rec { #define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi) #define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi) +__uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum); + /* * For logging record fields. */ @@ -96,46 +98,10 @@ union xfs_btree_rec { /* * Generic stats interface */ -#define __XFS_BTREE_STATS_INC(mp, type, stat) \ - XFS_STATS_INC(mp, xs_ ## type ## _2_ ## stat) #define XFS_BTREE_STATS_INC(cur, stat) \ -do { \ - struct xfs_mount *__mp = cur->bc_mp; \ - switch (cur->bc_btnum) { \ - case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(__mp, abtb, stat); break; \ - case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(__mp, abtc, stat); break; \ - case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \ - case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \ - case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \ - case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(__mp, rmap, stat); break; \ - case XFS_BTNUM_REFC: __XFS_BTREE_STATS_INC(__mp, refcbt, stat); break; \ - case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ - } \ -} while (0) - -#define __XFS_BTREE_STATS_ADD(mp, type, stat, val) \ - XFS_STATS_ADD(mp, xs_ ## type ## _2_ ## stat, val) -#define XFS_BTREE_STATS_ADD(cur, stat, val) \ -do { \ - struct xfs_mount *__mp = cur->bc_mp; \ - switch (cur->bc_btnum) { \ - case XFS_BTNUM_BNO: \ - __XFS_BTREE_STATS_ADD(__mp, abtb, stat, val); break; \ - case XFS_BTNUM_CNT: \ - __XFS_BTREE_STATS_ADD(__mp, abtc, stat, val); break; \ - case XFS_BTNUM_BMAP: \ - __XFS_BTREE_STATS_ADD(__mp, bmbt, stat, val); break; \ - case XFS_BTNUM_INO: \ - __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \ - case XFS_BTNUM_FINO: \ - __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \ - case XFS_BTNUM_RMAP: \ - __XFS_BTREE_STATS_ADD(__mp, rmap, stat, val); break; \ - case XFS_BTNUM_REFC: \ - __XFS_BTREE_STATS_ADD(__mp, refcbt, stat, val); break; \ - case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ - } \ -} while (0) + XFS_STATS_INC_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat) +#define XFS_BTREE_STATS_ADD(cur, stat, val) \ + XFS_STATS_ADD_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat, val) #define XFS_BTREE_MAXLEVELS 9 /* max of all btrees */ @@ -253,6 +219,7 @@ typedef struct xfs_btree_cur __uint8_t bc_nlevels; /* number of levels in the tree */ __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ xfs_btnum_t bc_btnum; /* identifies which btree type */ + int bc_statoff; /* offset of btre stats array */ union { struct { /* needed for BNO, CNT, INO */ struct xfs_buf *agbp; /* agf/agi buffer pointer */ @@ -413,7 +380,7 @@ void xfs_btree_init_block( struct xfs_mount *mp, struct xfs_buf *bp, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, @@ -424,7 +391,7 @@ xfs_btree_init_block_int( struct xfs_mount *mp, struct xfs_btree_block *buf, xfs_daddr_t blkno, - __u32 magic, + xfs_btnum_t btnum, __u16 level, __u16 numrecs, __u64 owner, @@ -491,7 +458,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) #define XFS_FSB_SANITY_CHECK(mp,fsb) \ - (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ + (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) /* diff --git a/fs/xfs/libxfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h index fad1676ad8cd..a416c7cb23ea 100644 --- a/fs/xfs/libxfs/xfs_cksum.h +++ b/fs/xfs/libxfs/xfs_cksum.h @@ -6,10 +6,11 @@ /* * Calculate the intermediate checksum for a buffer that has the CRC field * inside it. The offset of the 32bit crc fields is passed as the - * cksum_offset parameter. + * cksum_offset parameter. We do not modify the buffer during verification, + * hence we have to split the CRC calculation across the cksum_offset. */ static inline __uint32_t -xfs_start_cksum(char *buffer, size_t length, unsigned long cksum_offset) +xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset) { __uint32_t zero = 0; __uint32_t crc; @@ -26,6 +27,20 @@ xfs_start_cksum(char *buffer, size_t length, unsigned long cksum_offset) } /* + * Fast CRC method where the buffer is modified. Callers must have exclusive + * access to the buffer while the calculation takes place. + */ +static inline __uint32_t +xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset) +{ + /* zero the CRC field */ + *(__le32 *)(buffer + cksum_offset) = 0; + + /* single pass CRC calculation for the entire buffer */ + return crc32c(XFS_CRC_SEED, buffer, length); +} + +/* * Convert the intermediate checksum to the final ondisk format. * * The CRC32c calculation uses LE format even on BE machines, but returns the @@ -40,11 +55,14 @@ xfs_end_cksum(__uint32_t crc) /* * Helper to generate the checksum for a buffer. + * + * This modifies the buffer temporarily - callers must have exclusive + * access to the buffer while the calculation takes place. */ static inline void xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset) { - __uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset); + __uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset); *(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc); } @@ -55,7 +73,7 @@ xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset) static inline int xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset) { - __uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset); + __uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset); return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc); } diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index f2dc1a950c85..1bdf2888295b 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2633,7 +2633,7 @@ out_free: /* * Readahead the dir/attr block. */ -xfs_daddr_t +int xfs_da_reada_buf( struct xfs_inode *dp, xfs_dablk_t bno, @@ -2664,7 +2664,5 @@ out_free: if (mapp != &map) kmem_free(mapp); - if (error) - return -1; - return mappedbno; + return error; } diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 98c75cbe6ac2..4e29cb6a3627 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -201,7 +201,7 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, struct xfs_buf **bpp, int whichfork, const struct xfs_buf_ops *ops); -xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, +int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno, int whichfork, const struct xfs_buf_ops *ops); int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 20a96dd5af7e..2f389d366e93 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -36,21 +36,29 @@ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; /* - * @mode, if set, indicates that the type field needs to be set up. - * This uses the transformation from file mode to DT_* as defined in linux/fs.h - * for file type specification. This will be propagated into the directory - * structure if appropriate for the given operation and filesystem config. + * Convert inode mode to directory entry filetype */ -const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { - [0] = XFS_DIR3_FT_UNKNOWN, - [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, - [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, - [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, -}; +unsigned char xfs_mode_to_ftype(int mode) +{ + switch (mode & S_IFMT) { + case S_IFREG: + return XFS_DIR3_FT_REG_FILE; + case S_IFDIR: + return XFS_DIR3_FT_DIR; + case S_IFCHR: + return XFS_DIR3_FT_CHRDEV; + case S_IFBLK: + return XFS_DIR3_FT_BLKDEV; + case S_IFIFO: + return XFS_DIR3_FT_FIFO; + case S_IFSOCK: + return XFS_DIR3_FT_SOCK; + case S_IFLNK: + return XFS_DIR3_FT_SYMLINK; + default: + return XFS_DIR3_FT_UNKNOWN; + } +} /* * ASCII case-insensitive (ie. A-Z) support for directories that was @@ -93,7 +101,7 @@ xfs_ascii_ci_compname( return result; } -static struct xfs_nameops xfs_ascii_ci_nameops = { +static const struct xfs_nameops xfs_ascii_ci_nameops = { .hashname = xfs_ascii_ci_hashname, .compname = xfs_ascii_ci_compname, }; @@ -631,7 +639,8 @@ xfs_dir2_isblock( if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) return rval; rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; - ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize); + if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize) + return -EFSCORRUPTED; *vp = rval; return 0; } diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index becc926c3e3d..d6e6d9d16f6c 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -18,6 +18,9 @@ #ifndef __XFS_DIR2_H__ #define __XFS_DIR2_H__ +#include "xfs_da_format.h" +#include "xfs_da_btree.h" + struct xfs_defer_ops; struct xfs_da_args; struct xfs_inode; @@ -32,10 +35,9 @@ struct xfs_dir2_data_unused; extern struct xfs_name xfs_name_dotdot; /* - * directory filetype conversion tables. + * Convert inode mode to directory entry filetype */ -#define S_SHIFT 12 -extern const unsigned char xfs_mode_to_ftype[]; +extern unsigned char xfs_mode_to_ftype(int mode); /* * directory operations vector for encode/decode routines @@ -157,6 +159,9 @@ extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r); extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, struct xfs_buf *bp); +extern void xfs_dir2_data_freescan_int(struct xfs_da_geometry *geo, + const struct xfs_dir_ops *ops, + struct xfs_dir2_data_hdr *hdr, int *loghead); extern void xfs_dir2_data_freescan(struct xfs_inode *dp, struct xfs_dir2_data_hdr *hdr, int *loghead); extern void xfs_dir2_data_log_entry(struct xfs_da_args *args, @@ -177,6 +182,8 @@ extern struct xfs_dir2_data_free *xfs_dir2_data_freefind( struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup); +extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); + extern const struct xfs_buf_ops xfs_dir3_block_buf_ops; extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops; extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops; diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 725fc7841fde..d478065b9544 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -329,7 +329,7 @@ xfs_dir3_data_read( err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, XFS_DATA_FORK, &xfs_dir3_data_buf_ops); - if (!err && tp) + if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); return err; } @@ -505,8 +505,9 @@ xfs_dir2_data_freeremove( * Given a data block, reconstruct its bestfree map. */ void -xfs_dir2_data_freescan( - struct xfs_inode *dp, +xfs_dir2_data_freescan_int( + struct xfs_da_geometry *geo, + const struct xfs_dir_ops *ops, struct xfs_dir2_data_hdr *hdr, int *loghead) { @@ -516,7 +517,6 @@ xfs_dir2_data_freescan( struct xfs_dir2_data_free *bf; char *endp; /* end of block's data */ char *p; /* current entry pointer */ - struct xfs_da_geometry *geo = dp->i_mount->m_dir_geo; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || @@ -526,13 +526,13 @@ xfs_dir2_data_freescan( /* * Start by clearing the table. */ - bf = dp->d_ops->data_bestfree_p(hdr); + bf = ops->data_bestfree_p(hdr); memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT); *loghead = 1; /* * Set up pointers. */ - p = (char *)dp->d_ops->data_entry_p(hdr); + p = (char *)ops->data_entry_p(hdr); if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { btp = xfs_dir2_block_tail_p(geo, hdr); @@ -559,12 +559,22 @@ xfs_dir2_data_freescan( else { dep = (xfs_dir2_data_entry_t *)p; ASSERT((char *)dep - (char *)hdr == - be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep))); - p += dp->d_ops->data_entsize(dep->namelen); + be16_to_cpu(*ops->data_entry_tag_p(dep))); + p += ops->data_entsize(dep->namelen); } } } +void +xfs_dir2_data_freescan( + struct xfs_inode *dp, + struct xfs_dir2_data_hdr *hdr, + int *loghead) +{ + return xfs_dir2_data_freescan_int(dp->i_mount->m_dir_geo, dp->d_ops, + hdr, loghead); +} + /* * Initialize a data block at the given block number in the directory. * Give back the buffer for the created block. diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 75a557432d0f..bbd1238852b3 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { .verify_write = xfs_dir3_free_write_verify, }; +/* Everything ok in the free block header? */ +static bool +xfs_dir3_free_header_check( + struct xfs_inode *dp, + xfs_dablk_t fbno, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = dp->i_mount; + unsigned int firstdb; + int maxbests; + + maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo); + firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) - + xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) * + maxbests; + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; + + if (be32_to_cpu(hdr3->firstdb) != firstdb) + return false; + if (be32_to_cpu(hdr3->nvalid) > maxbests) + return false; + if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) + return false; + } else { + struct xfs_dir2_free_hdr *hdr = bp->b_addr; + + if (be32_to_cpu(hdr->firstdb) != firstdb) + return false; + if (be32_to_cpu(hdr->nvalid) > maxbests) + return false; + if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) + return false; + } + return true; +} static int __xfs_dir3_free_read( @@ -168,11 +204,22 @@ __xfs_dir3_free_read( err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, XFS_DATA_FORK, &xfs_dir3_free_buf_ops); + if (err || !*bpp) + return err; + + /* Check things that we can't do in the verifier. */ + if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { + xfs_buf_ioerror(*bpp, -EFSCORRUPTED); + xfs_verifier_error(*bpp); + xfs_trans_brelse(tp, *bpp); + return -EFSCORRUPTED; + } /* try read returns without an error or *bpp if it lands in a hole */ - if (!err && tp && *bpp) + if (tp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF); - return err; + + return 0; } int diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index ef9f6ead96a4..39f8604f764e 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -21,7 +21,6 @@ struct dir_context; /* xfs_dir2.c */ -extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, xfs_dir2_db_t *dbp); extern int xfs_dir_cilookup_result(struct xfs_da_args *args, @@ -126,6 +125,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); +extern int xfs_dir2_sf_verify(struct xfs_inode *ip); /* xfs_dir2_readdir.c */ extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index c6809ff41197..e84af093b2ab 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -629,6 +629,112 @@ xfs_dir2_sf_check( } #endif /* DEBUG */ +/* Verify the consistency of an inline directory. */ +int +xfs_dir2_sf_verify( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_dir2_sf_hdr *sfp; + struct xfs_dir2_sf_entry *sfep; + struct xfs_dir2_sf_entry *next_sfep; + char *endp; + const struct xfs_dir_ops *dops; + struct xfs_ifork *ifp; + xfs_ino_t ino; + int i; + int i8count; + int offset; + int size; + int error; + __uint8_t filetype; + + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); + /* + * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops, + * so we can only trust the mountpoint to have the right pointer. + */ + dops = xfs_dir_get_ops(mp, NULL); + + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data; + size = ifp->if_bytes; + + /* + * Give up if the directory is way too short. + */ + if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) || + size < xfs_dir2_sf_hdr_size(sfp->i8count)) + return -EFSCORRUPTED; + + endp = (char *)sfp + size; + + /* Check .. entry */ + ino = dops->sf_get_parent_ino(sfp); + i8count = ino > XFS_DIR2_MAX_SHORT_INUM; + error = xfs_dir_ino_validate(mp, ino); + if (error) + return error; + offset = dops->data_first_offset; + + /* Check all reported entries */ + sfep = xfs_dir2_sf_firstentry(sfp); + for (i = 0; i < sfp->count; i++) { + /* + * struct xfs_dir2_sf_entry has a variable length. + * Check the fixed-offset parts of the structure are + * within the data buffer. + */ + if (((char *)sfep + sizeof(*sfep)) >= endp) + return -EFSCORRUPTED; + + /* Don't allow names with known bad length. */ + if (sfep->namelen == 0) + return -EFSCORRUPTED; + + /* + * Check that the variable-length part of the structure is + * within the data buffer. The next entry starts after the + * name component, so nextentry is an acceptable test. + */ + next_sfep = dops->sf_nextentry(sfp, sfep); + if (endp < (char *)next_sfep) + return -EFSCORRUPTED; + + /* Check that the offsets always increase. */ + if (xfs_dir2_sf_get_offset(sfep) < offset) + return -EFSCORRUPTED; + + /* Check the inode number. */ + ino = dops->sf_get_ino(sfp, sfep); + i8count += ino > XFS_DIR2_MAX_SHORT_INUM; + error = xfs_dir_ino_validate(mp, ino); + if (error) + return error; + + /* Check the file type. */ + filetype = dops->sf_get_ftype(sfep); + if (filetype >= XFS_DIR3_FT_MAX) + return -EFSCORRUPTED; + + offset = xfs_dir2_sf_get_offset(sfep) + + dops->data_entsize(sfep->namelen); + + sfep = next_sfep; + } + if (i8count != sfp->i8count) + return -EFSCORRUPTED; + if ((void *)sfep != (void *)endp) + return -EFSCORRUPTED; + + /* Make sure this whole thing ought to be in local format. */ + if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize) + return -EFSCORRUPTED; + + return 0; +} + /* * Create a new (shortform) directory. */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 51b4e0de1fdc..d41ade5d293e 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -51,8 +51,7 @@ xfs_ialloc_cluster_alignment( struct xfs_mount *mp) { if (xfs_sb_version_hasalign(&mp->m_sb) && - mp->m_sb.sb_inoalignmt >= - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) + mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) return mp->m_sb.sb_inoalignmt; return 1; } @@ -2344,7 +2343,8 @@ xfs_imap( imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); imap->im_len = XFS_FSB_TO_BB(mp, 1); - imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); + imap->im_boffset = (unsigned short)(offset << + mp->m_sb.sb_inodelog); return 0; } @@ -2372,7 +2372,7 @@ out_map: imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); - imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); + imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); /* * If the inode number maps to a block outside the bounds @@ -2450,8 +2450,6 @@ xfs_ialloc_log_agi( ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); #endif - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); - /* * Compute byte offsets for the first and last fields in the first * region and log the agi buffer. This only logs up through @@ -2512,8 +2510,15 @@ xfs_agi_verify( if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) return false; - if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) + if (be32_to_cpu(agi->agi_level) < 1 || + be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) + return false; + + if (xfs_sb_version_hasfinobt(&mp->m_sb) && + (be32_to_cpu(agi->agi_free_level) < 1 || + be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS)) return false; + /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't @@ -2592,6 +2597,8 @@ xfs_read_agi( XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); if (error) return error; + if (tp) + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_AGI_BUF); xfs_buf_set_ref(*bpp, XFS_AGI_REF); return 0; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index eab68ae2e011..7c471881c9a6 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -82,11 +82,12 @@ xfs_finobt_set_root( } STATIC int -xfs_inobt_alloc_block( +__xfs_inobt_alloc_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *start, union xfs_btree_ptr *new, - int *stat) + int *stat, + enum xfs_ag_resv_type resv) { xfs_alloc_arg_t args; /* block allocation args */ int error; /* error return value */ @@ -103,6 +104,7 @@ xfs_inobt_alloc_block( args.maxlen = 1; args.prod = 1; args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.resv = resv; error = xfs_alloc_vextent(&args); if (error) { @@ -123,6 +125,27 @@ xfs_inobt_alloc_block( } STATIC int +xfs_inobt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); +} + +STATIC int +xfs_finobt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + return __xfs_inobt_alloc_block(cur, start, new, stat, + XFS_AG_RESV_METADATA); +} + +STATIC int xfs_inobt_free_block( struct xfs_btree_cur *cur, struct xfs_buf *bp) @@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_finobt_set_root, - .alloc_block = xfs_inobt_alloc_block, + .alloc_block = xfs_finobt_alloc_block, .free_block = xfs_inobt_free_block, .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, @@ -357,7 +380,7 @@ xfs_inobt_init_cursor( struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); struct xfs_btree_cur *cur; - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); cur->bc_tp = tp; cur->bc_mp = mp; @@ -365,9 +388,11 @@ xfs_inobt_init_cursor( if (btnum == XFS_BTNUM_INO) { cur->bc_nlevels = be32_to_cpu(agi->agi_level); cur->bc_ops = &xfs_inobt_ops; + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2); } else { cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); cur->bc_ops = &xfs_finobt_ops; + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_fibt_2); } cur->bc_blocklog = mp->m_sb.sb_blocklog; @@ -478,3 +503,64 @@ xfs_inobt_rec_check_count( return 0; } #endif /* DEBUG */ + +static xfs_extlen_t +xfs_inobt_max_size( + struct xfs_mount *mp) +{ + /* Bail out if we're uninitialized, which can happen in mkfs. */ + if (mp->m_inobt_mxr[0] == 0) + return 0; + + return xfs_btree_calc_size(mp, mp->m_inobt_mnr, + (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock / + XFS_INODES_PER_CHUNK); +} + +static int +xfs_inobt_count_blocks( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_btnum_t btnum, + xfs_extlen_t *tree_blocks) +{ + struct xfs_buf *agbp; + struct xfs_btree_cur *cur; + int error; + + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); + if (error) + return error; + + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum); + error = xfs_btree_count_blocks(cur, tree_blocks); + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_buf_relse(agbp); + + return error; +} + +/* + * Figure out how many blocks to reserve and how many are used by this btree. + */ +int +xfs_finobt_calc_reserves( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_extlen_t *ask, + xfs_extlen_t *used) +{ + xfs_extlen_t tree_len = 0; + int error; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return 0; + + error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len); + if (error) + return error; + + *ask += xfs_inobt_max_size(mp); + *used += tree_len; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index bd88453217ce..aa81e2e63f3f 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *, #define xfs_inobt_rec_check_count(mp, rec) 0 #endif /* DEBUG */ +int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_extlen_t *ask, xfs_extlen_t *used); + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 134424fac434..d93f9d918cfc 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -29,6 +29,7 @@ #include "xfs_icache.h" #include "xfs_trans.h" #include "xfs_ialloc.h" +#include "xfs_dir2.h" /* * Check that none of the inode's in the buffer have a next @@ -383,15 +384,28 @@ xfs_log_dinode_to_disk( static bool xfs_dinode_verify( struct xfs_mount *mp, - struct xfs_inode *ip, + xfs_ino_t ino, struct xfs_dinode *dip) { + uint16_t mode; uint16_t flags; uint64_t flags2; if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) return false; + /* don't allow invalid i_size */ + if (be64_to_cpu(dip->di_size) & (1ULL << 63)) + return false; + + mode = be16_to_cpu(dip->di_mode); + if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) + return false; + + /* No zero-length symlinks/dirs. */ + if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) + return false; + /* only version 3 or greater inodes are extensively verified here */ if (dip->di_version < 3) return true; @@ -401,7 +415,7 @@ xfs_dinode_verify( if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, XFS_DINODE_CRC_OFF)) return false; - if (be64_to_cpu(dip->di_ino) != ip->i_ino) + if (be64_to_cpu(dip->di_ino) != ino) return false; if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) return false; @@ -436,7 +450,7 @@ xfs_dinode_calc_crc( return; ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); - crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, + crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, XFS_DINODE_CRC_OFF); dip->di_crc = xfs_end_cksum(crc); } @@ -493,7 +507,7 @@ xfs_iread( return error; /* even unallocated inodes are verified */ - if (!xfs_dinode_verify(mp, ip, dip)) { + if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { xfs_alert(mp, "%s: validation failed for inode %lld failed", __func__, ip->i_ino); diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 3cfe12a4f58a..6848a0afbce7 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -58,8 +58,8 @@ struct xfs_icdinode { */ struct xfs_imap { xfs_daddr_t im_blkno; /* starting BB of inode chunk */ - ushort im_len; /* length in BBs of inode chunk */ - ushort im_boffset; /* inode offset in block in bytes */ + unsigned short im_len; /* length in BBs of inode chunk */ + unsigned short im_boffset; /* inode offset in block in bytes */ }; int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 5dd56d3dbb3a..8a37efe04de3 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -26,12 +26,15 @@ #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" +#include "xfs_btree.h" #include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_attr_sf.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_priv.h" kmem_zone_t *xfs_ifork_zone; @@ -209,6 +212,16 @@ xfs_iformat_fork( if (error) return error; + /* Check inline dir contents. */ + if (S_ISDIR(VFS_I(ip)->i_mode) && + dip->di_format == XFS_DINODE_FMT_LOCAL) { + error = xfs_dir2_sf_verify(ip); + if (error) { + xfs_idestroy_fork(ip, XFS_DATA_FORK); + return error; + } + } + if (xfs_is_reflink_inode(ip)) { ASSERT(ip->i_cowfp == NULL); xfs_ifork_init_cow(ip); @@ -319,7 +332,6 @@ xfs_iformat_local( int whichfork, int size) { - /* * If the size is unreasonable, then something * is wrong and we just bail out rather than crash in @@ -429,11 +441,13 @@ xfs_iformat_btree( /* REFERENCED */ int nrecs; int size; + int level; ifp = XFS_IFORK_PTR(ip, whichfork); dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); size = XFS_BMAP_BROOT_SPACE(mp, dfp); nrecs = be16_to_cpu(dfp->bb_numrecs); + level = be16_to_cpu(dfp->bb_level); /* * blow out if -- fork has less extents than can fit in @@ -446,7 +460,8 @@ xfs_iformat_btree( XFS_IFORK_MAXEXT(ip, whichfork) || XFS_BMDR_SPACE_CALC(nrecs) > XFS_DFORK_SIZE(dip, mp, whichfork) || - XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { + XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || + level == 0 || level > XFS_BTREE_MAXLEVELS) { xfs_warn(mp, "corrupt inode %Lu (btree).", (unsigned long long) ip->i_ino); XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, @@ -497,15 +512,14 @@ xfs_iread_extents( * We know that the size is valid (it's checked in iformat_btree) */ ifp->if_bytes = ifp->if_real_bytes = 0; - ifp->if_flags |= XFS_IFEXTENTS; xfs_iext_add(ifp, 0, nextents); error = xfs_bmap_read_extents(tp, ip, whichfork); if (error) { xfs_iext_destroy(ifp); - ifp->if_flags &= ~XFS_IFEXTENTS; return error; } xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); + ifp->if_flags |= XFS_IFEXTENTS; return 0; } /* @@ -775,6 +789,13 @@ xfs_idestroy_fork( } } +/* Count number of incore extents based on if_bytes */ +xfs_extnum_t +xfs_iext_count(struct xfs_ifork *ifp) +{ + return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); +} + /* * Convert in-core extents to on-disk form * @@ -803,7 +824,7 @@ xfs_iextents_copy( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(ifp->if_bytes > 0); - nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nrecs = xfs_iext_count(ifp); XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); ASSERT(nrecs > 0); @@ -941,7 +962,7 @@ xfs_iext_get_ext( xfs_extnum_t idx) /* index of target extent */ { ASSERT(idx >= 0); - ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + ASSERT(idx < xfs_iext_count(ifp)); if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { return ifp->if_u1.if_ext_irec->er_extbuf; @@ -1017,7 +1038,7 @@ xfs_iext_add( int new_size; /* size of extents after adding */ xfs_extnum_t nextents; /* number of extents in file */ - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT((idx >= 0) && (idx <= nextents)); byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); new_size = ifp->if_bytes + byte_diff; @@ -1241,7 +1262,7 @@ xfs_iext_remove( trace_xfs_iext_remove(ip, idx, state, _RET_IP_); ASSERT(ext_diff > 0); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); if (new_size == 0) { @@ -1270,7 +1291,7 @@ xfs_iext_remove_inline( ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); ASSERT(idx < XFS_INLINE_EXTS); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(((nextents - ext_diff) > 0) && (nextents - ext_diff) < XFS_INLINE_EXTS); @@ -1309,7 +1330,7 @@ xfs_iext_remove_direct( ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); new_size = ifp->if_bytes - (ext_diff * sizeof(xfs_bmbt_rec_t)); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (new_size == 0) { xfs_iext_destroy(ifp); @@ -1546,7 +1567,7 @@ xfs_iext_indirect_to_direct( int size; /* size of file extents */ ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(nextents <= XFS_LINEAR_EXTS); size = nextents * sizeof(xfs_bmbt_rec_t); @@ -1620,7 +1641,7 @@ xfs_iext_bno_to_ext( xfs_extnum_t nextents; /* number of file extents */ xfs_fileoff_t startoff = 0; /* start offset of extent */ - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { *idxp = 0; return NULL; @@ -1733,8 +1754,8 @@ xfs_iext_idx_to_irec( ASSERT(ifp->if_flags & XFS_IFEXTIREC); ASSERT(page_idx >= 0); - ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); - ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); + ASSERT(page_idx <= xfs_iext_count(ifp)); + ASSERT(page_idx < xfs_iext_count(ifp) || realloc); nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; erp_idx = 0; @@ -1782,7 +1803,7 @@ xfs_iext_irec_init( xfs_extnum_t nextents; /* number of extents in file */ ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(nextents <= XFS_LINEAR_EXTS); erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); @@ -1906,7 +1927,7 @@ xfs_iext_irec_compact( ASSERT(ifp->if_flags & XFS_IFEXTIREC); nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { xfs_iext_destroy(ifp); @@ -1996,3 +2017,49 @@ xfs_ifork_init_cow( ip->i_cformat = XFS_DINODE_FMT_EXTENTS; ip->i_cnextents = 0; } + +/* + * Lookup the extent covering bno. + * + * If there is an extent covering bno return the extent index, and store the + * expanded extent structure in *gotp, and the extent index in *idx. + * If there is no extent covering bno, but there is an extent after it (e.g. + * it lies in a hole) return that extent in *gotp and its index in *idx + * instead. + * If bno is beyond the last extent return false, and return the index after + * the last valid index in *idxp. + */ +bool +xfs_iext_lookup_extent( + struct xfs_inode *ip, + struct xfs_ifork *ifp, + xfs_fileoff_t bno, + xfs_extnum_t *idxp, + struct xfs_bmbt_irec *gotp) +{ + struct xfs_bmbt_rec_host *ep; + + XFS_STATS_INC(ip->i_mount, xs_look_exlist); + + ep = xfs_iext_bno_to_ext(ifp, bno, idxp); + if (!ep) + return false; + xfs_bmbt_get_all(ep, gotp); + return true; +} + +/* + * Return true if there is an extent at index idx, and return the expanded + * extent structure at idx in that case. Else return false. + */ +bool +xfs_iext_get_extent( + struct xfs_ifork *ifp, + xfs_extnum_t idx, + struct xfs_bmbt_irec *gotp) +{ + if (idx < 0 || idx >= xfs_iext_count(ifp)) + return false; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp); + return true; +} diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index c9476f50e32d..7fb8365326d1 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -152,6 +152,7 @@ void xfs_init_local_fork(struct xfs_inode *, int, const void *, int); struct xfs_bmbt_rec_host * xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); +xfs_extnum_t xfs_iext_count(struct xfs_ifork *); void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t, struct xfs_bmbt_irec *, int); void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int); @@ -181,6 +182,12 @@ void xfs_iext_irec_compact_pages(struct xfs_ifork *); void xfs_iext_irec_compact_full(struct xfs_ifork *); void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int); +bool xfs_iext_lookup_extent(struct xfs_inode *ip, + struct xfs_ifork *ifp, xfs_fileoff_t bno, + xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp); +bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx, + struct xfs_bmbt_irec *gotp); + extern struct kmem_zone *xfs_ifork_zone; extern void xfs_ifork_init_cow(struct xfs_inode *ip); diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 083cdd6d6c28..7ae571f8e34a 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -481,8 +481,8 @@ static inline uint xfs_log_dinode_size(int version) typedef struct xfs_buf_log_format { unsigned short blf_type; /* buf log item type indicator */ unsigned short blf_size; /* size of this item */ - ushort blf_flags; /* misc state */ - ushort blf_len; /* number of blocks in this buf */ + unsigned short blf_flags; /* misc state */ + unsigned short blf_len; /* number of blocks in this buf */ __int64_t blf_blkno; /* starting blkno of this buf */ unsigned int blf_map_size; /* used size of data bitmap in words */ unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */ diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 8e385f91d660..29a01ec89dd0 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -42,7 +42,6 @@ typedef struct xlog_recover_item { xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ } xlog_recover_item_t; -struct xlog_tid; typedef struct xlog_recover { struct hlist_node r_list; xlog_tid_t r_log_tid; /* log's transaction id */ @@ -52,7 +51,7 @@ typedef struct xlog_recover { struct list_head r_itemq; /* q for items */ } xlog_recover_t; -#define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr) +#define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr) /* * This is the number of entries in the l_buf_cancel_table used during diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 453bb2757ec2..50add5272807 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -354,6 +354,7 @@ xfs_refcountbt_init_cursor( cur->bc_btnum = XFS_BTNUM_REFC; cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_ops = &xfs_refcountbt_ops; + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2); cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); @@ -408,13 +409,14 @@ xfs_refcountbt_calc_size( */ xfs_extlen_t xfs_refcountbt_max_size( - struct xfs_mount *mp) + struct xfs_mount *mp, + xfs_agblock_t agblocks) { /* Bail out if we're uninitialized, which can happen in mkfs. */ if (mp->m_refc_mxr[0] == 0) return 0; - return xfs_refcountbt_calc_size(mp, mp->m_sb.sb_agblocks); + return xfs_refcountbt_calc_size(mp, agblocks); } /* @@ -429,22 +431,24 @@ xfs_refcountbt_calc_reserves( { struct xfs_buf *agbp; struct xfs_agf *agf; + xfs_agblock_t agblocks; xfs_extlen_t tree_len; int error; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - *ask += xfs_refcountbt_max_size(mp); error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); + agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_refcount_blocks); xfs_buf_relse(agbp); + *ask += xfs_refcountbt_max_size(mp, agblocks); *used += tree_len; return error; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index 3be7768bd51a..9db008b955b7 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -66,7 +66,8 @@ extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp, unsigned long long len); -extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp); +extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp, + xfs_agblock_t agblocks); extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 83e672ff7577..74e5a54bc428 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -484,6 +484,7 @@ xfs_rmapbt_init_cursor( cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_ops = &xfs_rmapbt_ops; cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); cur->bc_private.a.agbp = agbp; cur->bc_private.a.agno = agno; @@ -549,13 +550,14 @@ xfs_rmapbt_calc_size( */ xfs_extlen_t xfs_rmapbt_max_size( - struct xfs_mount *mp) + struct xfs_mount *mp, + xfs_agblock_t agblocks) { /* Bail out if we're uninitialized, which can happen in mkfs. */ if (mp->m_rmap_mxr[0] == 0) return 0; - return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks); + return xfs_rmapbt_calc_size(mp, agblocks); } /* @@ -570,25 +572,24 @@ xfs_rmapbt_calc_reserves( { struct xfs_buf *agbp; struct xfs_agf *agf; - xfs_extlen_t pool_len; + xfs_agblock_t agblocks; xfs_extlen_t tree_len; int error; if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) return 0; - /* Reserve 1% of the AG or enough for 1 block per record. */ - pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp)); - *ask += pool_len; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); + agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_rmap_blocks); xfs_buf_relse(agbp); + /* Reserve 1% of the AG or enough for 1 block per record. */ + *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks)); *used += tree_len; return error; diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 2a9ac472fb15..19c08e933049 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -60,7 +60,8 @@ extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp, unsigned long long len); -extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp); +extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp, + xfs_agblock_t agblocks); extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index e2e1106c9fad..ea45584a9913 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1016,4 +1016,3 @@ xfs_rtfree_extent( } return 0; } - diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index a70aec910626..584ec896a533 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -242,7 +242,7 @@ xfs_mount_validate_sb( sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_blocksize != (1 << sbp->sb_blocklog) || - sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || + sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || sbp->sb_inodelog < XFS_DINODE_MIN_LOG || @@ -262,6 +262,12 @@ xfs_mount_validate_sb( return -EFSCORRUPTED; } + if (xfs_sb_version_hascrc(&mp->m_sb) && + sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { + xfs_notice(mp, "v5 SB sanity check failed"); + return -EFSCORRUPTED; + } + /* * Until this is fixed only page-sized or smaller data blocks work. */ @@ -338,13 +344,16 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); - if (sbp->sb_qflags & XFS_PQUOTA_ACCT) { + if (sbp->sb_qflags & XFS_PQUOTA_ACCT && + sbp->sb_gquotino != NULLFSINO) { /* * In older version of superblock, on-disk superblock only * has sb_gquotino, and in-core superblock has both sb_gquotino * and sb_pquotino. But, only one of them is supported at any * point of time. So, if PQUOTA is set in disk superblock, - * copy over sb_gquotino to sb_pquotino. + * copy over sb_gquotino to sb_pquotino. The NULLFSINO test + * above is to make sure we don't do this twice and wipe them + * both out! */ sbp->sb_pquotino = sbp->sb_gquotino; sbp->sb_gquotino = NULLFSINO; diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 8d74870468c2..717909f2f7b7 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -57,7 +57,6 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ #define NULLAGBLOCK ((xfs_agblock_t)-1) #define NULLAGNUMBER ((xfs_agnumber_t)-1) -#define NULLEXTNUM ((xfs_extnum_t)-1) #define NULLCOMMITLSN ((xfs_lsn_t)-1) @@ -75,11 +74,14 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ * Minimum and maximum blocksize and sectorsize. * The blocksize upper limit is pretty much arbitrary. * The sectorsize upper limit is due to sizeof(sb_sectsize). + * CRC enable filesystems use 512 byte inodes, meaning 512 byte block sizes + * cannot be used. */ #define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ #define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ #define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) #define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) +#define XFS_MIN_CRC_BLOCKSIZE (1 << (XFS_MIN_BLOCKSIZE_LOG + 1)) #define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */ #define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */ #define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG) |