summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-04-15 11:19:58 -0700
committerJakub Kicinski <kuba@kernel.org>2024-04-15 11:19:58 -0700
commit77c842caa772044db9f8517a03d33df779f9517c (patch)
treec35d85cdf08a651df1049f026d4e6c4eb2dcd178
parent444cde13826bb4d3f9fdf829bf5e2f7bb03d9c32 (diff)
parent4ba67ef3a1fbb7d8dc5f00de9b93a583d05b38cc (diff)
Merge branch 'net-dqs-optimize-if-stall-threshold-is-not-set'
Breno Leitao says: ==================== net: dqs: optimize if stall threshold is not set Here are four patches aimed at enhancing the Dynamic Queue Limit (DQL) subsystem within the networking stack. The first two commits involve code refactoring, while the third patch introduces the actual change. The fourth patch just improves the cache locality. Typically, when DQL is enabled, stall information is always populated through dql_queue_stall(). However, this information is only necessary if a stall threshold is set, which is stored in struct dql->stall_thrs. Although dql_queue_stall() is relatively inexpensive, it is not entirely free due to memory barriers and similar overheads. To optimize performance, refrain from calling dql_queue_stall() when no stall threshold is set, thus avoiding the processing of unnecessary information. v1: https://lore.kernel.org/all/20240404145939.3601097-1-leitao@debian.org/ ==================== Link: https://lore.kernel.org/r/20240411192241.2498631-1-leitao@debian.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/linux/dynamic_queue_limits.h50
-rw-r--r--lib/dynamic_queue_limits.c13
2 files changed, 39 insertions, 24 deletions
diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index 5693a4be0d9a..281298e77a15 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -50,6 +50,9 @@ struct dql {
unsigned int adj_limit; /* limit + num_completed */
unsigned int last_obj_cnt; /* Count at last queuing */
+ /* Stall threshold (in jiffies), defined by user */
+ unsigned short stall_thrs;
+
unsigned long history_head; /* top 58 bits of jiffies */
/* stall entries, a bit per entry */
unsigned long history[DQL_HIST_LEN];
@@ -71,8 +74,6 @@ struct dql {
unsigned int min_limit; /* Minimum limit */
unsigned int slack_hold_time; /* Time to measure slack */
- /* Stall threshold (in jiffies), defined by user */
- unsigned short stall_thrs;
/* Longest stall detected, reported to user */
unsigned short stall_max;
unsigned long last_reap; /* Last reap (in jiffies) */
@@ -83,27 +84,11 @@ struct dql {
#define DQL_MAX_OBJECT (UINT_MAX / 16)
#define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT)
-/*
- * Record number of objects queued. Assumes that caller has already checked
- * availability in the queue with dql_avail.
- */
-static inline void dql_queued(struct dql *dql, unsigned int count)
+/* Populate the bitmap to be processed later in dql_check_stall() */
+static inline void dql_queue_stall(struct dql *dql)
{
unsigned long map, now, now_hi, i;
- BUG_ON(count > DQL_MAX_OBJECT);
-
- dql->last_obj_cnt = count;
-
- /* We want to force a write first, so that cpu do not attempt
- * to get cache line containing last_obj_cnt, num_queued, adj_limit
- * in Shared state, but directly does a Request For Ownership
- * It is only a hint, we use barrier() only.
- */
- barrier();
-
- dql->num_queued += count;
-
now = jiffies;
now_hi = now / BITS_PER_LONG;
@@ -133,6 +118,31 @@ static inline void dql_queued(struct dql *dql, unsigned int count)
WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now));
}
+/*
+ * Record number of objects queued. Assumes that caller has already checked
+ * availability in the queue with dql_avail.
+ */
+static inline void dql_queued(struct dql *dql, unsigned int count)
+{
+ if (WARN_ON_ONCE(count > DQL_MAX_OBJECT))
+ return;
+
+ dql->last_obj_cnt = count;
+
+ /* We want to force a write first, so that cpu do not attempt
+ * to get cache line containing last_obj_cnt, num_queued, adj_limit
+ * in Shared state, but directly does a Request For Ownership
+ * It is only a hint, we use barrier() only.
+ */
+ barrier();
+
+ dql->num_queued += count;
+
+ /* Only populate stall information if the threshold is set */
+ if (READ_ONCE(dql->stall_thrs))
+ dql_queue_stall(dql);
+}
+
/* Returns how many objects can be queued, < 0 indicates over limit. */
static inline int dql_avail(const struct dql *dql)
{
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index a1389db1c30a..e49deddd3de9 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -15,12 +15,10 @@
#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
#define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
-static void dql_check_stall(struct dql *dql)
+static void dql_check_stall(struct dql *dql, unsigned short stall_thrs)
{
- unsigned short stall_thrs;
unsigned long now;
- stall_thrs = READ_ONCE(dql->stall_thrs);
if (!stall_thrs)
return;
@@ -86,9 +84,16 @@ void dql_completed(struct dql *dql, unsigned int count)
{
unsigned int inprogress, prev_inprogress, limit;
unsigned int ovlimit, completed, num_queued;
+ unsigned short stall_thrs;
bool all_prev_completed;
num_queued = READ_ONCE(dql->num_queued);
+ /* Read stall_thrs in advance since it belongs to the same (first)
+ * cache line as ->num_queued. This way, dql_check_stall() does not
+ * need to touch the first cache line again later, reducing the window
+ * of possible false sharing.
+ */
+ stall_thrs = READ_ONCE(dql->stall_thrs);
/* Can't complete more than what's in queue */
BUG_ON(count > num_queued - dql->num_completed);
@@ -178,7 +183,7 @@ void dql_completed(struct dql *dql, unsigned int count)
dql->num_completed = completed;
dql->prev_num_queued = num_queued;
- dql_check_stall(dql);
+ dql_check_stall(dql, stall_thrs);
}
EXPORT_SYMBOL(dql_completed);