summaryrefslogtreecommitdiff
path: root/include/linux/netdevice.h
diff options
context:
space:
mode:
authorBjörn Töpel <bjorn.topel@intel.com>2020-11-30 19:51:56 +0100
committerDaniel Borkmann <daniel@iogearbox.net>2020-12-01 00:09:25 +0100
commit7fd3253a7de6a317a0683f83739479fb880bffc8 (patch)
tree8490a9756c6a4a7f5078fd856f23b568703e3680 /include/linux/netdevice.h
parent854055c0cf30d732b3514ce7956976f60496b1a1 (diff)
net: Introduce preferred busy-polling
The existing busy-polling mode, enabled by the SO_BUSY_POLL socket option or system-wide using the /proc/sys/net/core/busy_read knob, is an opportunistic. That means that if the NAPI context is not scheduled, it will poll it. If, after busy-polling, the budget is exceeded the busy-polling logic will schedule the NAPI onto the regular softirq handling. One implication of the behavior above is that a busy/heavy loaded NAPI context will never enter/allow for busy-polling. Some applications prefer that most NAPI processing would be done by busy-polling. This series adds a new socket option, SO_PREFER_BUSY_POLL, that works in concert with the napi_defer_hard_irqs and gro_flush_timeout knobs. The napi_defer_hard_irqs and gro_flush_timeout knobs were introduced in commit 6f8b12d661d0 ("net: napi: add hard irqs deferral feature"), and allows for a user to defer interrupts to be enabled and instead schedule the NAPI context from a watchdog timer. When a user enables the SO_PREFER_BUSY_POLL, again with the other knobs enabled, and the NAPI context is being processed by a softirq, the softirq NAPI processing will exit early to allow the busy-polling to be performed. If the application stops performing busy-polling via a system call, the watchdog timer defined by gro_flush_timeout will timeout, and regular softirq handling will resume. In summary; Heavy traffic applications that prefer busy-polling over softirq processing should use this option. Example usage: $ echo 2 | sudo tee /sys/class/net/ens785f1/napi_defer_hard_irqs $ echo 200000 | sudo tee /sys/class/net/ens785f1/gro_flush_timeout Note that the timeout should be larger than the userspace processing window, otherwise the watchdog will timeout and fall back to regular softirq processing. Enable the SO_BUSY_POLL/SO_PREFER_BUSY_POLL options on your socket. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Link: https://lore.kernel.org/bpf/20201130185205.196029-2-bjorn.topel@gmail.com
Diffstat (limited to 'include/linux/netdevice.h')
-rw-r--r--include/linux/netdevice.h35
1 files changed, 21 insertions, 14 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7ce648a564f7..52d1cc2bd8a7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -350,23 +350,25 @@ struct napi_struct {
};
enum {
- NAPI_STATE_SCHED, /* Poll is scheduled */
- NAPI_STATE_MISSED, /* reschedule a napi */
- NAPI_STATE_DISABLE, /* Disable pending */
- NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
- NAPI_STATE_LISTED, /* NAPI added to system lists */
- NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
- NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
+ NAPI_STATE_SCHED, /* Poll is scheduled */
+ NAPI_STATE_MISSED, /* reschedule a napi */
+ NAPI_STATE_DISABLE, /* Disable pending */
+ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
+ NAPI_STATE_LISTED, /* NAPI added to system lists */
+ NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */
+ NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */
+ NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
};
enum {
- NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED),
- NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED),
- NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE),
- NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC),
- NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
- NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
- NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
+ NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED),
+ NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED),
+ NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE),
+ NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC),
+ NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
+ NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
+ NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
+ NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL),
};
enum gro_result {
@@ -437,6 +439,11 @@ static inline bool napi_disable_pending(struct napi_struct *n)
return test_bit(NAPI_STATE_DISABLE, &n->state);
}
+static inline bool napi_prefer_busy_poll(struct napi_struct *n)
+{
+ return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
+}
+
bool napi_schedule_prep(struct napi_struct *n);
/**