summaryrefslogtreecommitdiff
path: root/fs/dlm/recoverd.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dlm/recoverd.c')
-rw-r--r--fs/dlm/recoverd.c41
1 files changed, 26 insertions, 15 deletions
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index f1a9073c0835..32f9f8926ec3 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq)
set_bit(LSFL_RUNNING, &ls->ls_flags);
/* unblocks processes waiting to enter the dlm */
up_write(&ls->ls_in_recovery);
+ clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
error = 0;
}
spin_unlock(&ls->ls_recover_lock);
@@ -60,12 +61,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
dlm_callback_suspend(ls);
- /*
- * Free non-master tossed rsb's. Master rsb's are kept on toss
- * list and put on root list to be included in resdir recovery.
- */
-
- dlm_clear_toss_list(ls);
+ dlm_clear_toss(ls);
/*
* This list of root rsb's will be the basis of most of the recovery
@@ -84,6 +80,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
goto fail;
}
+ dlm_recover_dir_nodeid(ls);
+
+ ls->ls_recover_dir_sent_res = 0;
+ ls->ls_recover_dir_sent_msg = 0;
ls->ls_recover_locks_in = 0;
dlm_set_recover_status(ls, DLM_RS_NODES);
@@ -115,6 +115,9 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
goto fail;
}
+ log_debug(ls, "dlm_recover_directory %u out %u messages",
+ ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg);
+
/*
* We may have outstanding operations that are waiting for a reply from
* a failed node. Mark these to be resent after recovery. Unlock and
@@ -260,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
rv = ls->ls_recover_args;
ls->ls_recover_args = NULL;
if (rv && ls->ls_recover_seq == rv->seq)
- clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
+ clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
spin_unlock(&ls->ls_recover_lock);
if (rv) {
@@ -280,26 +283,34 @@ static int dlm_recoverd(void *arg)
return -1;
}
+ down_write(&ls->ls_in_recovery);
+ set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
+ wake_up(&ls->ls_recover_lock_wait);
+
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
- if (!test_bit(LSFL_WORK, &ls->ls_flags))
+ if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
+ !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags))
schedule();
set_current_state(TASK_RUNNING);
- if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags))
+ if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
+ down_write(&ls->ls_in_recovery);
+ set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
+ wake_up(&ls->ls_recover_lock_wait);
+ }
+
+ if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
do_ls_recovery(ls);
}
+ if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
+ up_write(&ls->ls_in_recovery);
+
dlm_put_lockspace(ls);
return 0;
}
-void dlm_recoverd_kick(struct dlm_ls *ls)
-{
- set_bit(LSFL_WORK, &ls->ls_flags);
- wake_up_process(ls->ls_recoverd_task);
-}
-
int dlm_recoverd_start(struct dlm_ls *ls)
{
struct task_struct *p;