summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2022-07-14 11:06:02 -0700
committerDarrick J. Wong <djwong@kernel.org>2022-11-09 19:07:25 -0800
commit60cef46bccc678235a132d2c1ece93ad8cb1a5aa (patch)
tree8b14757a814ba3a1db680eea0c4647074f121a82
parentbd31e01a194a7a2bc34b010d114201d35121f09b (diff)
xfs: cache pages used for xfarray quicksort convergence
After quicksort picks a pivot item for a particular subsort, it walks the records in that subset from the outside in, rearranging them so that every record less than the pivot comes before it, and every record greater than the pivot comes after it. This scan has a lot of locality, so we can speed it up quite a bit by grabbing the xfile backing page and holding onto it as long as we possibly can. Doing so reduces the runtime by another 5% on the author's computer. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/scrub/xfarray.c85
1 files changed, 75 insertions, 10 deletions
diff --git a/fs/xfs/scrub/xfarray.c b/fs/xfs/scrub/xfarray.c
index 1238971eaf86..bed1d2c760c8 100644
--- a/fs/xfs/scrub/xfarray.c
+++ b/fs/xfs/scrub/xfarray.c
@@ -770,6 +770,65 @@ xfarray_qsort_push(
}
/*
+ * Load an element from the array into the first scratchpad and cache the page,
+ * if possible.
+ */
+static inline int
+xfarray_sort_load_cached(
+ struct xfarray_sortinfo *si,
+ xfarray_idx_t idx,
+ void *ptr)
+{
+ loff_t idx_pos = xfarray_pos(si->array, idx);
+ pgoff_t startpage;
+ pgoff_t endpage;
+ int error = 0;
+
+ /*
+ * If this load would split a page, release the cached page, if any,
+ * and perform a traditional read.
+ */
+ startpage = idx_pos >> PAGE_SHIFT;
+ endpage = (idx_pos + si->array->obj_size - 1) >> PAGE_SHIFT;
+ if (startpage != endpage) {
+ error = xfarray_sort_put_page(si);
+ if (error)
+ return error;
+
+ if (xfarray_sort_terminated(si, &error))
+ return error;
+
+ return xfile_obj_load(si->array->xfile, ptr,
+ si->array->obj_size, idx_pos);
+ }
+
+ /* If the cached page is not the one we want, release it. */
+ if (si->page && si->page->index != startpage) {
+ error = xfarray_sort_put_page(si);
+ if (error)
+ return error;
+ }
+
+ /*
+ * If we don't have a cached page (and we know the load is contained
+ * in a single page) then grab it.
+ */
+ if (!si->page) {
+ if (xfarray_sort_terminated(si, &error))
+ return error;
+
+ error = xfarray_sort_get_page(si, startpage << PAGE_SHIFT,
+ PAGE_SIZE);
+ if (error)
+ return error;
+ }
+
+ memcpy(ptr, si->page_kaddr + offset_in_page(idx_pos),
+ si->array->obj_size);
+ return 0;
+}
+
+/*
* Sort the array elements via quicksort. This implementation incorporates
* four optimizations discussed in Sedgewick:
*
@@ -794,6 +853,10 @@ xfarray_qsort_push(
* If a small set is contained entirely within a single xfile memory page,
* map the page directly and run heap sort directly on the xfile page
* instead of using the load/store interface. This halves the runtime.
+ *
+ * 5. This optimization is specific to the implementation. When converging lo
+ * and hi after selecting a pivot, we will try to retain the xfile memory
+ * page between load calls, which reduces run time by 50%.
*/
/*
@@ -875,19 +938,20 @@ xfarray_sort(
* Decrement hi until it finds an a[hi] less than the
* pivot value.
*/
- error = xfarray_sort_load(si, hi, scratch);
+ error = xfarray_sort_load_cached(si, hi, scratch);
if (error)
goto out_free;
while (xfarray_sort_cmp(si, scratch, pivot) >= 0 &&
lo < hi) {
- if (xfarray_sort_terminated(si, &error))
- goto out_free;
-
hi--;
- error = xfarray_sort_load(si, hi, scratch);
+ error = xfarray_sort_load_cached(si, hi,
+ scratch);
if (error)
goto out_free;
}
+ error = xfarray_sort_put_page(si);
+ if (error)
+ goto out_free;
if (xfarray_sort_terminated(si, &error))
goto out_free;
@@ -903,19 +967,20 @@ xfarray_sort(
* Increment lo until it finds an a[lo] greater than
* the pivot value.
*/
- error = xfarray_sort_load(si, lo, scratch);
+ error = xfarray_sort_load_cached(si, lo, scratch);
if (error)
goto out_free;
while (xfarray_sort_cmp(si, scratch, pivot) <= 0 &&
lo < hi) {
- if (xfarray_sort_terminated(si, &error))
- goto out_free;
-
lo++;
- error = xfarray_sort_load(si, lo, scratch);
+ error = xfarray_sort_load_cached(si, lo,
+ scratch);
if (error)
goto out_free;
}
+ error = xfarray_sort_put_page(si);
+ if (error)
+ goto out_free;
if (xfarray_sort_terminated(si, &error))
goto out_free;