diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-02-24 15:56:46 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-02-24 15:56:46 +1100 |
commit | 9489452c104e7a22af68b2f64aeee287b12f91b9 (patch) | |
tree | 6544b69236a53fabf44bb2880951add4206d7a52 | |
parent | 230dcdfff82e9991260b5c1b823231ed1170cf1e (diff) | |
parent | 7cdf056c39947bc539c2c412f60a893ebdc24fac (diff) |
Merge commit 'ocfs2/linux-next'
Conflicts:
Documentation/feature-removal-schedule.txt
-rw-r--r-- | Documentation/ABI/obsolete/o2cb | 11 | ||||
-rw-r--r-- | Documentation/ABI/stable/o2cb | 10 | ||||
-rw-r--r-- | Documentation/feature-removal-schedule.txt | 8 | ||||
-rw-r--r-- | fs/ocfs2/alloc.c | 460 | ||||
-rw-r--r-- | fs/ocfs2/cluster/sys.c | 9 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 51 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp_internal.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/ioctl.c | 12 | ||||
-rw-r--r-- | fs/ocfs2/ioctl.h | 3 | ||||
-rw-r--r-- | fs/sysfs/symlink.c | 9 |
11 files changed, 501 insertions, 78 deletions
diff --git a/Documentation/ABI/obsolete/o2cb b/Documentation/ABI/obsolete/o2cb new file mode 100644 index 000000000000..9c49d8e6c0cc --- /dev/null +++ b/Documentation/ABI/obsolete/o2cb @@ -0,0 +1,11 @@ +What: /sys/o2cb symlink +Date: Dec 2005 +KernelVersion: 2.6.16 +Contact: ocfs2-devel@oss.oracle.com +Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will + be removed when new versions of ocfs2-tools which know to look + in /sys/fs/o2cb are sufficiently prevalent. Don't code new + software to look here, it should try /sys/fs/o2cb instead. + See Documentation/ABI/stable/o2cb for more information on usage. +Users: ocfs2-tools. It's sufficient to mail proposed changes to + ocfs2-devel@oss.oracle.com. diff --git a/Documentation/ABI/stable/o2cb b/Documentation/ABI/stable/o2cb new file mode 100644 index 000000000000..5eb1545e0b8d --- /dev/null +++ b/Documentation/ABI/stable/o2cb @@ -0,0 +1,10 @@ +What: /sys/fs/o2cb/ (was /sys/o2cb) +Date: Dec 2005 +KernelVersion: 2.6.16 +Contact: ocfs2-devel@oss.oracle.com +Description: Ocfs2-tools looks at 'interface-revision' for versioning + information. Each logmask/ file controls a set of debug prints + and can be written into with the strings "allow", "deny", or + "off". Reading the file returns the current state. +Users: ocfs2-tools. It's sufficient to mail proposed changes to + ocfs2-devel@oss.oracle.com. diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 7b5bbdff3a77..f74620cf60c1 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -316,3 +316,11 @@ Who: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>, Borislav Petkov <petkovbb@googlemail.com> --------------------------- + +What: /sys/o2cb symlink +When: January 2010 +Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb + exists as a symlink for backwards compatibility for old versions of + ocfs2-tools. 2 years should be sufficient time to phase in new versions + which know to look in /sys/fs/o2cb. +Who: ocfs2-devel@oss.oracle.com diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 447206eb5c2e..7d81aa6f5672 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1450,6 +1450,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, * - When our insert into the right path leaf is at the leftmost edge * and requires an update of the path immediately to it's left. This * can occur at the end of some types of rotation and appending inserts. + * - When we've adjusted the last extent record in the left path leaf and the + * 1st extent record in the right path leaf during cross extent block merge. */ static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, struct ocfs2_path *left_path, @@ -2712,24 +2714,147 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, } } +static int ocfs2_get_right_path(struct inode *inode, + struct ocfs2_path *left_path, + struct ocfs2_path **ret_right_path) +{ + int ret; + u32 right_cpos; + struct ocfs2_path *right_path = NULL; + struct ocfs2_extent_list *left_el; + + *ret_right_path = NULL; + + /* This function shouldn't be called for non-trees. */ + BUG_ON(left_path->p_tree_depth == 0); + + left_el = path_leaf_el(left_path); + BUG_ON(left_el->l_next_free_rec != left_el->l_count); + + ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, + &right_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* This function shouldn't be called for the rightmost leaf. */ + BUG_ON(right_cpos == 0); + + right_path = ocfs2_new_path(path_root_bh(left_path), + path_root_el(left_path)); + if (!right_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_path(inode, right_path, right_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + *ret_right_path = right_path; +out: + if (ret) + ocfs2_free_path(right_path); + return ret; +} + /* * Remove split_rec clusters from the record at index and merge them - * onto the beginning of the record at index + 1. + * onto the beginning of the record "next" to it. + * For index < l_count - 1, the next means the extent rec at index + 1. + * For index == l_count - 1, the "next" means the 1st extent rec of the + * next extent block. */ -static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, - handle_t *handle, - struct ocfs2_extent_rec *split_rec, - struct ocfs2_extent_list *el, int index) +static int ocfs2_merge_rec_right(struct inode *inode, + struct ocfs2_path *left_path, + handle_t *handle, + struct ocfs2_extent_rec *split_rec, + int index) { - int ret; + int ret, next_free, i; unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); struct ocfs2_extent_rec *left_rec; struct ocfs2_extent_rec *right_rec; + struct ocfs2_extent_list *right_el; + struct ocfs2_path *right_path = NULL; + int subtree_index = 0; + struct ocfs2_extent_list *el = path_leaf_el(left_path); + struct buffer_head *bh = path_leaf_bh(left_path); + struct buffer_head *root_bh = NULL; BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); - left_rec = &el->l_recs[index]; - right_rec = &el->l_recs[index + 1]; + + if (index == le16_to_cpu(el->l_next_free_rec - 1) && + le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { + /* we meet with a cross extent block merge. */ + ret = ocfs2_get_right_path(inode, left_path, &right_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + right_el = path_leaf_el(right_path); + next_free = le16_to_cpu(right_el->l_next_free_rec); + BUG_ON(next_free <= 0); + right_rec = &right_el->l_recs[0]; + if (ocfs2_is_empty_extent(right_rec)) { + BUG_ON(le16_to_cpu(next_free) <= 1); + right_rec = &right_el->l_recs[1]; + } + + BUG_ON(le32_to_cpu(left_rec->e_cpos) + + le16_to_cpu(left_rec->e_leaf_clusters) != + le32_to_cpu(right_rec->e_cpos)); + + subtree_index = ocfs2_find_subtree_root(inode, + left_path, right_path); + + ret = ocfs2_extend_rotate_transaction(handle, subtree_index, + handle->h_buffer_credits, + right_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + root_bh = left_path->p_node[subtree_index].bh; + BUG_ON(root_bh != right_path->p_node[subtree_index].bh); + + ret = ocfs2_journal_access(handle, inode, root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + for (i = subtree_index + 1; + i < path_num_items(right_path); i++) { + ret = ocfs2_journal_access(handle, inode, + right_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + left_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + } else { + BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1); + right_rec = &el->l_recs[index + 1]; + } ret = ocfs2_journal_access(handle, inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -2751,30 +2876,156 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, if (ret) mlog_errno(ret); + if (right_path) { + ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); + if (ret) + mlog_errno(ret); + + ocfs2_complete_edge_insert(inode, handle, left_path, + right_path, subtree_index); + } +out: + if (right_path) + ocfs2_free_path(right_path); + return ret; +} + +static int ocfs2_get_left_path(struct inode *inode, + struct ocfs2_path *right_path, + struct ocfs2_path **ret_left_path) +{ + int ret; + u32 left_cpos; + struct ocfs2_path *left_path = NULL; + + *ret_left_path = NULL; + + /* This function shouldn't be called for non-trees. */ + BUG_ON(right_path->p_tree_depth == 0); + + ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, + right_path, &left_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* This function shouldn't be called for the leftmost leaf. */ + BUG_ON(left_cpos == 0); + + left_path = ocfs2_new_path(path_root_bh(right_path), + path_root_el(right_path)); + if (!left_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_path(inode, left_path, left_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + *ret_left_path = left_path; out: + if (ret) + ocfs2_free_path(left_path); return ret; } /* * Remove split_rec clusters from the record at index and merge them - * onto the tail of the record at index - 1. + * onto the tail of the record "before" it. + * For index > 0, the "before" means the extent rec at index - 1. + * + * For index == 0, the "before" means the last record of the previous + * extent block. And there is also a situation that we may need to + * remove the rightmost leaf extent block in the right_path and change + * the right path to indicate the new rightmost path. */ -static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, +static int ocfs2_merge_rec_left(struct inode *inode, + struct ocfs2_path *right_path, handle_t *handle, struct ocfs2_extent_rec *split_rec, - struct ocfs2_extent_list *el, int index) + struct ocfs2_cached_dealloc_ctxt *dealloc, + int index) { - int ret, has_empty_extent = 0; + int ret, i, subtree_index = 0, has_empty_extent = 0; unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); struct ocfs2_extent_rec *left_rec; struct ocfs2_extent_rec *right_rec; + struct ocfs2_extent_list *el = path_leaf_el(right_path); + struct buffer_head *bh = path_leaf_bh(right_path); + struct buffer_head *root_bh = NULL; + struct ocfs2_path *left_path = NULL; + struct ocfs2_extent_list *left_el; - BUG_ON(index <= 0); + BUG_ON(index < 0); - left_rec = &el->l_recs[index - 1]; right_rec = &el->l_recs[index]; - if (ocfs2_is_empty_extent(&el->l_recs[0])) - has_empty_extent = 1; + if (index == 0) { + /* we meet with a cross extent block merge. */ + ret = ocfs2_get_left_path(inode, right_path, &left_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + left_el = path_leaf_el(left_path); + BUG_ON(le16_to_cpu(left_el->l_next_free_rec) != + le16_to_cpu(left_el->l_count)); + + left_rec = &left_el->l_recs[ + le16_to_cpu(left_el->l_next_free_rec) - 1]; + BUG_ON(le32_to_cpu(left_rec->e_cpos) + + le16_to_cpu(left_rec->e_leaf_clusters) != + le32_to_cpu(split_rec->e_cpos)); + + subtree_index = ocfs2_find_subtree_root(inode, + left_path, right_path); + + ret = ocfs2_extend_rotate_transaction(handle, subtree_index, + handle->h_buffer_credits, + left_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + root_bh = left_path->p_node[subtree_index].bh; + BUG_ON(root_bh != right_path->p_node[subtree_index].bh); + + ret = ocfs2_journal_access(handle, inode, root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + for (i = subtree_index + 1; + i < path_num_items(right_path); i++) { + ret = ocfs2_journal_access(handle, inode, + right_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + left_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } + } else { + left_rec = &el->l_recs[index - 1]; + if (ocfs2_is_empty_extent(&el->l_recs[0])) + has_empty_extent = 1; + } ret = ocfs2_journal_access(handle, inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -2790,9 +3041,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, *left_rec = *split_rec; has_empty_extent = 0; - } else { + } else le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); - } le32_add_cpu(&right_rec->e_cpos, split_clusters); le64_add_cpu(&right_rec->e_blkno, @@ -2805,13 +3055,44 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, if (ret) mlog_errno(ret); + if (left_path) { + ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); + if (ret) + mlog_errno(ret); + + /* + * In the situation that the right_rec is empty and the extent + * block is empty also, ocfs2_complete_edge_insert can't handle + * it and we need to delete the right extent block. + */ + if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && + le16_to_cpu(el->l_next_free_rec) == 1) { + + ret = ocfs2_remove_rightmost_path(inode, handle, + right_path, dealloc); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* Now the rightmost extent block has been deleted. + * So we use the new rightmost path. + */ + ocfs2_mv_path(right_path, left_path); + left_path = NULL; + } else + ocfs2_complete_edge_insert(inode, handle, left_path, + right_path, subtree_index); + } out: + if (left_path) + ocfs2_free_path(left_path); return ret; } static int ocfs2_try_to_merge_extent(struct inode *inode, handle_t *handle, - struct ocfs2_path *left_path, + struct ocfs2_path *path, int split_index, struct ocfs2_extent_rec *split_rec, struct ocfs2_cached_dealloc_ctxt *dealloc, @@ -2819,7 +3100,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, { int ret = 0; - struct ocfs2_extent_list *el = path_leaf_el(left_path); + struct ocfs2_extent_list *el = path_leaf_el(path); struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; BUG_ON(ctxt->c_contig_type == CONTIG_NONE); @@ -2832,7 +3113,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * extents - having more than one in a leaf is * illegal. */ - ret = ocfs2_rotate_tree_left(inode, handle, left_path, + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); if (ret) { mlog_errno(ret); @@ -2847,7 +3128,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * Left-right contig implies this. */ BUG_ON(!ctxt->c_split_covers_rec); - BUG_ON(split_index == 0); /* * Since the leftright insert always covers the entire @@ -2858,9 +3138,14 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * Since the adding of an empty extent shifts * everything back to the right, there's no need to * update split_index here. + * + * When the split_index is zero, we need to merge it to the + * prevoius extent block. It is more efficient and easier + * if we do merge_right first and merge_left later. */ - ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), - handle, split_rec, el, split_index); + ret = ocfs2_merge_rec_right(inode, path, + handle, split_rec, + split_index); if (ret) { mlog_errno(ret); goto out; @@ -2871,32 +3156,30 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, */ BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); - /* - * The left merge left us with an empty extent, remove - * it. - */ - ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc); + /* The merge left us with an empty extent, remove it. */ + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); if (ret) { mlog_errno(ret); goto out; } - split_index--; + rec = &el->l_recs[split_index]; /* * Note that we don't pass split_rec here on purpose - - * we've merged it into the left side. + * we've merged it into the rec already. */ - ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), - handle, rec, el, split_index); + ret = ocfs2_merge_rec_left(inode, path, + handle, rec, + dealloc, + split_index); + if (ret) { mlog_errno(ret); goto out; } - BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); - - ret = ocfs2_rotate_tree_left(inode, handle, left_path, + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); /* * Error from this last rotate is not critical, so @@ -2915,8 +3198,9 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, */ if (ctxt->c_contig_type == CONTIG_RIGHT) { ret = ocfs2_merge_rec_left(inode, - path_leaf_bh(left_path), - handle, split_rec, el, + path, + handle, split_rec, + dealloc, split_index); if (ret) { mlog_errno(ret); @@ -2924,8 +3208,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, } } else { ret = ocfs2_merge_rec_right(inode, - path_leaf_bh(left_path), - handle, split_rec, el, + path, + handle, split_rec, split_index); if (ret) { mlog_errno(ret); @@ -2938,7 +3222,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * The merge may have left an empty extent in * our leaf. Try to rotate it away. */ - ret = ocfs2_rotate_tree_left(inode, handle, left_path, + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); if (ret) mlog_errno(ret); @@ -3498,20 +3782,57 @@ out: } static enum ocfs2_contig_type -ocfs2_figure_merge_contig_type(struct inode *inode, +ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, struct ocfs2_extent_list *el, int index, struct ocfs2_extent_rec *split_rec) { - struct ocfs2_extent_rec *rec; + int status; enum ocfs2_contig_type ret = CONTIG_NONE; + u32 left_cpos, right_cpos; + struct ocfs2_extent_rec *rec = NULL; + struct ocfs2_extent_list *new_el; + struct ocfs2_path *left_path = NULL, *right_path = NULL; + struct buffer_head *bh; + struct ocfs2_extent_block *eb; + + if (index > 0) { + rec = &el->l_recs[index - 1]; + } else if (path->p_tree_depth > 0) { + status = ocfs2_find_cpos_for_left_leaf(inode->i_sb, + path, &left_cpos); + if (status) + goto out; + + if (left_cpos != 0) { + left_path = ocfs2_new_path(path_root_bh(path), + path_root_el(path)); + if (!left_path) + goto out; + + status = ocfs2_find_path(inode, left_path, left_cpos); + if (status) + goto out; + + new_el = path_leaf_el(left_path); + + if (le16_to_cpu(new_el->l_next_free_rec) != + le16_to_cpu(new_el->l_count)) { + bh = path_leaf_bh(left_path); + eb = (struct ocfs2_extent_block *)bh->b_data; + OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, + eb); + goto out; + } + rec = &new_el->l_recs[ + le16_to_cpu(new_el->l_next_free_rec) - 1]; + } + } /* * We're careful to check for an empty extent record here - * the merge code will know what to do if it sees one. */ - - if (index > 0) { - rec = &el->l_recs[index - 1]; + if (rec) { if (index == 1 && ocfs2_is_empty_extent(rec)) { if (split_rec->e_cpos == el->l_recs[index].e_cpos) ret = CONTIG_RIGHT; @@ -3520,10 +3841,45 @@ ocfs2_figure_merge_contig_type(struct inode *inode, } } - if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) { + rec = NULL; + if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) + rec = &el->l_recs[index + 1]; + else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) && + path->p_tree_depth > 0) { + status = ocfs2_find_cpos_for_right_leaf(inode->i_sb, + path, &right_cpos); + if (status) + goto out; + + if (right_cpos == 0) + goto out; + + right_path = ocfs2_new_path(path_root_bh(path), + path_root_el(path)); + if (!right_path) + goto out; + + status = ocfs2_find_path(inode, right_path, right_cpos); + if (status) + goto out; + + new_el = path_leaf_el(right_path); + rec = &new_el->l_recs[0]; + if (ocfs2_is_empty_extent(rec)) { + if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { + bh = path_leaf_bh(right_path); + eb = (struct ocfs2_extent_block *)bh->b_data; + OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, + eb); + goto out; + } + rec = &new_el->l_recs[1]; + } + } + + if (rec) { enum ocfs2_contig_type contig_type; - rec = &el->l_recs[index + 1]; contig_type = ocfs2_extent_contig(inode, rec, split_rec); if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) @@ -3532,6 +3888,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, ret = contig_type; } +out: + if (left_path) + ocfs2_free_path(left_path); + if (right_path) + ocfs2_free_path(right_path); + return ret; } @@ -3994,7 +4356,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, goto out; } - ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, + ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el, split_index, split_rec); diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index 0c095ce7723d..98429fd68499 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c @@ -57,6 +57,7 @@ static struct kset *o2cb_kset; void o2cb_sys_shutdown(void) { mlog_sys_shutdown(); + sysfs_remove_link(NULL, "o2cb"); kset_unregister(o2cb_kset); } @@ -68,6 +69,14 @@ int o2cb_sys_init(void) if (!o2cb_kset) return -ENOMEM; + /* + * Create this symlink for backwards compatibility with old + * versions of ocfs2-tools which look for things in /sys/o2cb. + */ + ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb"); + if (ret) + goto error; + ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); if (ret) goto error; diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index ee50c9610e7f..d808bfae6138 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -399,8 +399,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); - /* we won't reconnect after our valid conn goes away for - * this hb iteration.. here so it shows up in the logs */ if (was_valid && !valid && err == 0) err = -ENOTCONN; @@ -430,11 +428,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, if (!was_valid && valid) { o2quo_conn_up(o2net_num_from_nn(nn)); - /* this is a bit of a hack. we only try reconnecting - * when heartbeating starts until we get a connection. - * if that connection then dies we don't try reconnecting. - * the only way to start connecting again is to down - * heartbeat and bring it back up. */ cancel_delayed_work(&nn->nn_connect_expired); printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", o2nm_this_node() > sc->sc_node->nd_num ? @@ -457,6 +450,18 @@ static void o2net_set_nn_state(struct o2net_node *nn, delay = 0; mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); + + /* + * Delay the expired work after idle timeout. + * + * We might have lots of failed connection attempts that run + * through here but we only cancel the connect_expired work when + * a connection attempt succeeds. So only the first enqueue of + * the connect_expired work will do anything. The rest will see + * that it's already queued and do nothing. + */ + delay += msecs_to_jiffies(o2net_idle_timeout(sc->sc_node)); + queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay); } /* keep track of the nn's sc ref for the caller */ @@ -1193,6 +1198,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) * shut down already */ if (nn->nn_sc == sc) { o2net_sc_reset_idle_timer(sc); + atomic_set(&nn->nn_timeout, 0); o2net_set_nn_state(nn, sc, 1, 0); } spin_unlock(&nn->nn_lock); @@ -1391,6 +1397,7 @@ static void o2net_sc_send_keep_req(struct work_struct *work) static void o2net_idle_timer(unsigned long data) { struct o2net_sock_container *sc = (struct o2net_sock_container *)data; + struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); struct timeval now; do_gettimeofday(&now); @@ -1413,6 +1420,12 @@ static void o2net_idle_timer(unsigned long data) sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); + /* + * Initialize the nn_timeout so that the next connection attempt + * will continue in o2net_start_connect. + */ + atomic_set(&nn->nn_timeout, 1); + o2net_sc_queue_work(sc, &sc->sc_shutdown_work); } @@ -1447,6 +1460,7 @@ static void o2net_start_connect(struct work_struct *work) struct socket *sock = NULL; struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; int ret = 0, stop; + unsigned int timeout; /* if we're greater we initiate tx, otherwise we accept */ if (o2nm_this_node() <= o2net_num_from_nn(nn)) @@ -1466,8 +1480,17 @@ static void o2net_start_connect(struct work_struct *work) } spin_lock(&nn->nn_lock); - /* see if we already have one pending or have given up */ - stop = (nn->nn_sc || nn->nn_persistent_error); + /* + * see if we already have one pending or have given up. + * For nn_timeout, it is set when we close the connection + * because of the idle time out. So it means that we have + * at least connected to that node successfully once, + * now try to connect to it again. + */ + timeout = atomic_read(&nn->nn_timeout); + stop = (nn->nn_sc || + (nn->nn_persistent_error && + (nn->nn_persistent_error != -ENOTCONN || timeout == 0))); spin_unlock(&nn->nn_lock); if (stop) goto out; @@ -1580,6 +1603,7 @@ void o2net_disconnect_node(struct o2nm_node *node) /* don't reconnect until it's heartbeating again */ spin_lock(&nn->nn_lock); + atomic_set(&nn->nn_timeout, 0); o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); spin_unlock(&nn->nn_lock); @@ -1614,17 +1638,12 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); if (node_num != o2nm_this_node()) { - /* heartbeat doesn't work unless a local node number is - * configured and doing so brings up the o2net_wq, so we can - * use it.. */ - queue_delayed_work(o2net_wq, &nn->nn_connect_expired, - msecs_to_jiffies(o2net_idle_timeout(node))); - /* believe it or not, accept and node hearbeating testing * can succeed for this node before we got here.. so * only use set_nn_state to clear the persistent error * if that hasn't already happened */ spin_lock(&nn->nn_lock); + atomic_set(&nn->nn_timeout, 0); if (nn->nn_persistent_error) o2net_set_nn_state(nn, NULL, 0, 0); spin_unlock(&nn->nn_lock); @@ -1748,6 +1767,7 @@ static int o2net_accept_one(struct socket *sock) new_sock = NULL; spin_lock(&nn->nn_lock); + atomic_set(&nn->nn_timeout, 0); o2net_set_nn_state(nn, sc, 0, 0); spin_unlock(&nn->nn_lock); @@ -1942,6 +1962,7 @@ int o2net_init(void) for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { struct o2net_node *nn = o2net_nn_from_num(i); + atomic_set(&nn->nn_timeout, 0); spin_lock_init(&nn->nn_lock); INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); INIT_DELAYED_WORK(&nn->nn_connect_expired, diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index d25b9af28500..b4c5586f46ea 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -95,6 +95,8 @@ struct o2net_node { unsigned nn_sc_valid:1; /* if this is set tx just returns it */ int nn_persistent_error; + /* It is only set to 1 after the idle time out. */ + atomic_t nn_timeout; /* threads waiting for an sc to arrive wait on the wq for generation * to increase. it is increased when a connecting socket succeeds diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ed5d5232e85d..9154c82d3258 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2242,7 +2242,7 @@ const struct file_operations ocfs2_fops = { .open = ocfs2_file_open, .aio_read = ocfs2_file_aio_read, .aio_write = ocfs2_file_aio_write, - .ioctl = ocfs2_ioctl, + .unlocked_ioctl = ocfs2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ocfs2_compat_ioctl, #endif @@ -2258,7 +2258,7 @@ const struct file_operations ocfs2_dops = { .fsync = ocfs2_sync_file, .release = ocfs2_dir_release, .open = ocfs2_dir_open, - .ioctl = ocfs2_ioctl, + .unlocked_ioctl = ocfs2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ocfs2_compat_ioctl, #endif diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 5177fba5162b..16e389742762 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -112,9 +112,9 @@ bail: return status; } -int ocfs2_ioctl(struct inode * inode, struct file * filp, - unsigned int cmd, unsigned long arg) +long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_path.dentry->d_inode; unsigned int flags; int new_clusters; int status; @@ -168,9 +168,6 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, #ifdef CONFIG_COMPAT long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int ret; - switch (cmd) { case OCFS2_IOC32_GETFLAGS: cmd = OCFS2_IOC_GETFLAGS; @@ -190,9 +187,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) return -ENOIOCTLCMD; } - lock_kernel(); - ret = ocfs2_ioctl(inode, file, cmd, arg); - unlock_kernel(); - return ret; + return ocfs2_ioctl(file, cmd, arg); } #endif diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h index 4d6c4f430d0d..cf9a5ee30fef 100644 --- a/fs/ocfs2/ioctl.h +++ b/fs/ocfs2/ioctl.h @@ -10,8 +10,7 @@ #ifndef OCFS2_IOCTL_H #define OCFS2_IOCTL_H -int ocfs2_ioctl(struct inode * inode, struct file * filp, - unsigned int cmd, unsigned long arg); +long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); #endif /* OCFS2_IOCTL_H */ diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 5f66c4466151..817f5966edca 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -87,7 +87,14 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char void sysfs_remove_link(struct kobject * kobj, const char * name) { - sysfs_hash_and_remove(kobj->sd, name); + struct sysfs_dirent *parent_sd = NULL; + + if (!kobj) + parent_sd = &sysfs_root; + else + parent_sd = kobj->sd; + + sysfs_hash_and_remove(parent_sd, name); } static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, |