summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_unlink_recover.c
blob: 7635a9e9f03fe0404726426cec85fe2326e0ee4d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
 * All Rights Reserved.
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
#include "xfs_trans_priv.h"
#include "xfs_ialloc.h"
#include "xfs_icache.h"
#include "xfs_pwork.h"

/*
 * This routine performs a transaction to null out a bad inode pointer
 * in an agi unlinked inode hash bucket.
 */
STATIC void
xlog_recover_clear_agi_bucket(
	struct xfs_mount	*mp,
	xfs_agnumber_t		agno,
	int			bucket)
{
	struct xfs_trans	*tp;
	struct xfs_agi		*agi;
	struct xfs_buf		*agibp;
	int			offset;
	int			error;

	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
	if (error)
		goto out_error;

	error = xfs_read_agi(mp, tp, agno, &agibp);
	if (error)
		goto out_abort;

	agi = agibp->b_addr;
	agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
	offset = offsetof(xfs_agi_t, agi_unlinked) +
		 (sizeof(xfs_agino_t) * bucket);
	xfs_trans_log_buf(tp, agibp, offset,
			  (offset + sizeof(xfs_agino_t) - 1));

	error = xfs_trans_commit(tp);
	if (error)
		goto out_error;
	return;

out_abort:
	xfs_trans_cancel(tp);
out_error:
	xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
}

STATIC xfs_agino_t
xlog_recover_process_one_iunlink(
	struct xfs_mount		*mp,
	xfs_agnumber_t			agno,
	xfs_agino_t			agino,
	int				bucket)
{
	struct xfs_buf			*ibp;
	struct xfs_dinode		*dip;
	struct xfs_inode		*ip;
	xfs_ino_t			ino;
	int				error;

	ino = XFS_AGINO_TO_INO(mp, agno, agino);
	error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
	if (error)
		goto fail;

	/*
	 * Get the on disk inode to find the next inode in the bucket.
	 */
	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0);
	if (error)
		goto fail_iput;

	xfs_iflags_clear(ip, XFS_IRECOVERY);
	ASSERT(VFS_I(ip)->i_nlink == 0);
	ASSERT(VFS_I(ip)->i_mode != 0);

	/* setup for the next pass */
	agino = be32_to_cpu(dip->di_next_unlinked);
	xfs_buf_relse(ibp);

	/*
	 * Prevent any DMAPI event from being sent when the reference on
	 * the inode is dropped.
	 */
	ip->i_d.di_dmevmask = 0;

	xfs_irele(ip);
	return agino;

 fail_iput:
	xfs_irele(ip);
 fail:
	/*
	 * We can't read in the inode this bucket points to, or this inode
	 * is messed up.  Just ditch this bucket of inodes.  We will lose
	 * some inodes and space, but at least we won't hang.
	 *
	 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
	 * clear the inode pointer in the bucket.
	 */
	xlog_recover_clear_agi_bucket(mp, agno, bucket);
	return NULLAGINO;
}

/*
 * Recover AGI unlinked lists
 *
 * This is called during recovery to process any inodes which we unlinked but
 * not freed when the system crashed.  These inodes will be on the lists in the
 * AGI blocks. What we do here is scan all the AGIs and fully truncate and free
 * any inodes found on the lists. Each inode is removed from the lists when it
 * has been fully truncated and is freed. The freeing of the inode and its
 * removal from the list must be atomic.
 *
 * If everything we touch in the agi processing loop is already in memory, this
 * loop can hold the cpu for a long time. It runs without lock contention,
 * memory allocation contention, the need wait for IO, etc, and so will run
 * until we either run out of inodes to process, run low on memory or we run out
 * of log space.
 *
 * This behaviour is bad for latency on single CPU and non-preemptible kernels,
 * and can prevent other filesytem work (such as CIL pushes) from running. This
 * can lead to deadlocks if the recovery process runs out of log reservation
 * space. Hence we need to yield the CPU when there is other kernel work
 * scheduled on this CPU to ensure other scheduled work can run without undue
 * latency.
 */
STATIC int
xlog_recover_process_ag_iunlinked(
	struct xfs_mount	*mp,
	xfs_agnumber_t		agno)
{
	struct xfs_agi		*agi;
	struct xfs_buf		*agibp;
	xfs_agino_t		agino;
	int			bucket;
	int			error;

	/*
	 * Find the agi for this ag.
	 */
	error = xfs_read_agi(mp, NULL, agno, &agibp);
	if (error) {
		/*
		 * AGI is b0rked. Don't process it.
		 *
		 * We should probably mark the filesystem as corrupt
		 * after we've recovered all the ag's we can....
		 */
		return error;
	}

	/*
	 * Unlock the buffer so that it can be acquired in the normal
	 * course of the transaction to truncate and free each inode.
	 * Because we are not racing with anyone else here for the AGI
	 * buffer, we don't even need to hold it locked to read the
	 * initial unlinked bucket entries out of the buffer. We keep
	 * buffer reference though, so that it stays pinned in memory
	 * while we need the buffer.
	 */
	agi = agibp->b_addr;
	xfs_buf_unlock(agibp);

	for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
		agino = be32_to_cpu(agi->agi_unlinked[bucket]);
		while (agino != NULLAGINO) {
			agino = xlog_recover_process_one_iunlink(mp,
						agno, agino, bucket);
			cond_resched();
		}
	}
	xfs_buf_rele(agibp);

	return 0;
}

struct xlog_recover_unlinked {
	struct xfs_pwork	pwork;
	xfs_agnumber_t		agno;
};

static int
xlog_recover_process_unlinked_ag(
	struct xfs_mount		*mp,
	struct xfs_pwork		*pwork)
{
	struct xlog_recover_unlinked	*ru;
	int				error = 0;

	ru = container_of(pwork, struct xlog_recover_unlinked, pwork);
	if (xfs_pwork_want_abort(pwork))
		goto out;

	error = xlog_recover_process_ag_iunlinked(mp, ru->agno);
out:
	kmem_free(ru);
	return error;
}

int
xlog_recover_process_unlinked(
	struct xlog		*log)
{
	struct xfs_mount	*mp = log->l_mp;
	struct xfs_pwork_ctl	pctl;
	struct xlog_recover_unlinked *ru;
	unsigned int		nr_threads;
	xfs_agnumber_t		agno;
	int			error;

	nr_threads = xfs_pwork_guess_datadev_parallelism(mp);
	error = xfs_pwork_init(mp, &pctl, xlog_recover_process_unlinked_ag,
			"xlog_recover", nr_threads);
	if (error)
		return error;

	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
		if (xfs_pwork_ctl_want_abort(&pctl))
			break;

		ru = kmem_zalloc(sizeof(struct xlog_recover_unlinked), 0);
		ru->agno = agno;
		xfs_pwork_queue(&pctl, &ru->pwork);
	}

	return xfs_pwork_destroy(&pctl);
}