From b9047726386bb538cf5e4f52a9f04eb556eebc67 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 2 Aug 2017 19:51:11 -0700 Subject: ipc: mqueue: Replace timespec with timespec64 struct timespec is not y2038 safe. Replace all uses of timespec by y2038 safe struct timespec64. Even though timespec is used here to represent timeouts, replace these with timespec64 so that it facilitates in verification by creating a y2038 safe kernel image that is free of timespec. The syscall interfaces themselves are not changed as part of the patch. They will be part of a different series. Signed-off-by: Deepa Dinamani Cc: Paul Moore Cc: Richard Guy Briggs Reviewed-by: Richard Guy Briggs Reviewed-by: Arnd Bergmann Acked-by: Paul Moore Signed-off-by: Al Viro --- kernel/auditsc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/auditsc.c') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3260ba2312a9..daee2d5bd03a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1235,11 +1235,11 @@ static void show_special(struct audit_context *context, int *call_panic) case AUDIT_MQ_SENDRECV: audit_log_format(ab, "mqdes=%d msg_len=%zd msg_prio=%u " - "abs_timeout_sec=%ld abs_timeout_nsec=%ld", + "abs_timeout_sec=%lld abs_timeout_nsec=%ld", context->mq_sendrecv.mqdes, context->mq_sendrecv.msg_len, context->mq_sendrecv.msg_prio, - context->mq_sendrecv.abs_timeout.tv_sec, + (long long) context->mq_sendrecv.abs_timeout.tv_sec, context->mq_sendrecv.abs_timeout.tv_nsec); break; case AUDIT_MQ_NOTIFY: @@ -2083,15 +2083,15 @@ void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) * */ void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, - const struct timespec *abs_timeout) + const struct timespec64 *abs_timeout) { struct audit_context *context = current->audit_context; - struct timespec *p = &context->mq_sendrecv.abs_timeout; + struct timespec64 *p = &context->mq_sendrecv.abs_timeout; if (abs_timeout) - memcpy(p, abs_timeout, sizeof(struct timespec)); + memcpy(p, abs_timeout, sizeof(*p)); else - memset(p, 0, sizeof(struct timespec)); + memset(p, 0, sizeof(*p)); context->mq_sendrecv.mqdes = mqdes; context->mq_sendrecv.msg_len = msg_len; -- cgit v1.2.3 From e832bf48c8e12f3b39e40fee35c4ea269d685875 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 4 Jul 2017 13:11:43 +0100 Subject: audit: Reduce overhead using a coarse clock Commit 2115bb250f26 ("audit: Use timespec64 to represent audit timestamps") noted that audit timestamps were not y2038 safe and used a 64-bit timestamp. In itself, this makes sense but the conversion was from CURRENT_TIME to ktime_get_real_ts64() which is a heavier call to record an accurate timestamp which is required in some, but not all, cases. The impact is that when auditd is running without any rules that all syscalls have higher overhead. This is visible in the sysbench-thread benchmark as a 11.5% performance hit. That benchmark is dumb as rocks but it's also visible in redis as an 8-10% hit on all operations which is of greater concern. It is somewhat stupid of audit to track syscalls without any rules related to syscalls but that is how it behaves. The overhead can be directly measured with perf comparing 4.9 with 4.12 4.9 7.76% sysbench [kernel.vmlinux] [k] __schedule 7.62% sysbench [kernel.vmlinux] [k] _raw_spin_lock 7.37% sysbench libpthread-2.22.so [.] __lll_lock_elision 7.29% sysbench [kernel.vmlinux] [.] syscall_return_via_sysret 6.59% sysbench [kernel.vmlinux] [k] native_sched_clock 5.21% sysbench libc-2.22.so [.] __sched_yield 4.38% sysbench [kernel.vmlinux] [k] entry_SYSCALL_64 4.28% sysbench [kernel.vmlinux] [k] do_syscall_64 3.49% sysbench libpthread-2.22.so [.] __lll_unlock_elision 3.13% sysbench [kernel.vmlinux] [k] __audit_syscall_exit 2.87% sysbench [kernel.vmlinux] [k] update_curr 2.73% sysbench [kernel.vmlinux] [k] pick_next_task_fair 2.31% sysbench [kernel.vmlinux] [k] syscall_trace_enter 2.20% sysbench [kernel.vmlinux] [k] __audit_syscall_entry ..... 0.00% swapper [kernel.vmlinux] [k] read_tsc 4.12 7.84% sysbench [kernel.vmlinux] [k] __schedule 7.05% sysbench [kernel.vmlinux] [k] _raw_spin_lock 6.57% sysbench libpthread-2.22.so [.] __lll_lock_elision 6.50% sysbench [kernel.vmlinux] [.] syscall_return_via_sysret 5.95% sysbench [kernel.vmlinux] [k] read_tsc 5.71% sysbench [kernel.vmlinux] [k] native_sched_clock 4.78% sysbench libc-2.22.so [.] __sched_yield 4.30% sysbench [kernel.vmlinux] [k] entry_SYSCALL_64 3.94% sysbench [kernel.vmlinux] [k] do_syscall_64 3.37% sysbench libpthread-2.22.so [.] __lll_unlock_elision 3.32% sysbench [kernel.vmlinux] [k] __audit_syscall_exit 2.91% sysbench [kernel.vmlinux] [k] __getnstimeofday64 Note the additional overhead from read_tsc which goes from 0% to 5.95%. This is on a single-socket E3-1230 but similar overheads have been measured on an older machine which the patch also eliminates. The patch in question has no explanation as to why a fully-accurate timestamp is required and is likely an oversight. Using a coarser, but monotically increasing, timestamp the overhead can be eliminated. While it can be worked around by configuring or disabling audit, it's tricky enough to detect that a kernel fix is justified. With this patch, we see the following; sysbenchthread 4.9.0 4.12.0 4.12.0 vanilla vanilla coarse-v1r1 Amean 1 1.49 ( 0.00%) 1.66 ( -11.42%) 1.51 ( -1.34%) Amean 3 1.48 ( 0.00%) 1.65 ( -11.45%) 1.50 ( -0.96%) Amean 5 1.49 ( 0.00%) 1.67 ( -12.31%) 1.51 ( -1.83%) Amean 7 1.49 ( 0.00%) 1.66 ( -11.72%) 1.50 ( -0.67%) Amean 12 1.48 ( 0.00%) 1.65 ( -11.57%) 1.52 ( -2.89%) Amean 16 1.49 ( 0.00%) 1.65 ( -11.13%) 1.51 ( -1.73%) The benchmark is reporting the time required for different thread counts to lock/unlock a private mutex which, while dense, demonstrates the syscall overhead. This is showing that 4.12 took a 11-12% hit but the overhead is almost eliminated by the patch. While the variance is not reported here, it's well within the noise with the patch applied. Signed-off-by: Mel Gorman Acked-by: Arnd Bergmann Acked-by: Deepa Dinamani Signed-off-by: Paul Moore --- kernel/audit.c | 2 +- kernel/auditsc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/auditsc.c') diff --git a/kernel/audit.c b/kernel/audit.c index 6dd556931739..0cf15c77c5eb 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1662,7 +1662,7 @@ static inline void audit_get_stamp(struct audit_context *ctx, struct timespec64 *t, unsigned int *serial) { if (!ctx || !auditsc_get_stamp(ctx, t, serial)) { - ktime_get_real_ts64(t); + *t = current_kernel_time64(); *serial = audit_serial(); } } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3260ba2312a9..fd72e11acfb2 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1536,7 +1536,7 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, return; context->serial = 0; - ktime_get_real_ts64(&context->ctime); + context->ctime = current_kernel_time64(); context->in_syscall = 1; context->current_state = state; context->ppid = 0; -- cgit v1.2.3 From 196a5085592c62ffa4eb739d7ce49c040c2953a1 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 7 Aug 2017 21:44:24 +0800 Subject: audit: update the function comments Update the function comments to match the code. Signed-off-by: Geliang Tang Signed-off-by: Paul Moore --- kernel/audit.c | 2 +- kernel/auditsc.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel/auditsc.c') diff --git a/kernel/audit.c b/kernel/audit.c index 0cf15c77c5eb..be1c28fd4d57 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1833,7 +1833,7 @@ void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) } /** - * audit_log_hex - convert a buffer to hex and append it to the audit skb + * audit_log_n_hex - convert a buffer to hex and append it to the audit skb * @ab: the audit_buffer * @buf: buffer to convert to hex * @len: length of @buf to be converted diff --git a/kernel/auditsc.c b/kernel/auditsc.c index fd72e11acfb2..aac1a41f82bd 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1462,7 +1462,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts } /** - * audit_free - free a per-task audit context + * __audit_free - free a per-task audit context * @tsk: task whose audit context block to free * * Called from copy_process and do_exit @@ -1489,7 +1489,7 @@ void __audit_free(struct task_struct *tsk) } /** - * audit_syscall_entry - fill in an audit record at syscall entry + * __audit_syscall_entry - fill in an audit record at syscall entry * @major: major syscall type (function) * @a1: additional syscall register 1 * @a2: additional syscall register 2 @@ -1543,7 +1543,7 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, } /** - * audit_syscall_exit - deallocate audit context after a system call + * __audit_syscall_exit - deallocate audit context after a system call * @success: success value of the syscall * @return_code: return value of the syscall * @@ -1705,7 +1705,7 @@ static struct audit_names *audit_alloc_name(struct audit_context *context, } /** - * audit_reusename - fill out filename with info from existing entry + * __audit_reusename - fill out filename with info from existing entry * @uptr: userland ptr to pathname * * Search the audit_names list for the current audit context. If there is an @@ -1730,7 +1730,7 @@ __audit_reusename(const __user char *uptr) } /** - * audit_getname - add a name to the list + * __audit_getname - add a name to the list * @name: name to add * * Add a name to the list of audit names for this context. @@ -2135,7 +2135,7 @@ void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) } /** - * audit_ipc_obj - record audit data for ipc object + * __audit_ipc_obj - record audit data for ipc object * @ipcp: ipc permissions * */ @@ -2151,7 +2151,7 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp) } /** - * audit_ipc_set_perm - record audit data for new ipc permissions + * __audit_ipc_set_perm - record audit data for new ipc permissions * @qbytes: msgq bytes * @uid: msgq user id * @gid: msgq group id @@ -2180,7 +2180,7 @@ void __audit_bprm(struct linux_binprm *bprm) /** - * audit_socketcall - record audit data for sys_socketcall + * __audit_socketcall - record audit data for sys_socketcall * @nargs: number of args, which should not be more than AUDITSC_ARGS. * @args: args array * @@ -2211,7 +2211,7 @@ void __audit_fd_pair(int fd1, int fd2) } /** - * audit_sockaddr - record audit data for sys_bind, sys_connect, sys_sendto + * __audit_sockaddr - record audit data for sys_bind, sys_connect, sys_sendto * @len: data length in user space * @a: data address in kernel space * -- cgit v1.2.3