summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c1
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/verifier.c11
-rw-r--r--kernel/compat.c2
-rw-r--r--kernel/entry/common.c5
-rw-r--r--kernel/events/core.c6
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/kcsan/Makefile2
-rw-r--r--kernel/sched/core.c7
-rw-r--r--kernel/sched/fair.c55
-rw-r--r--kernel/trace/ftrace.c5
-rw-r--r--kernel/trace/kprobe_event_gen_test.c4
-rw-r--r--kernel/trace/ring_buffer.c4
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/trace/trace_events_hist.c12
-rw-r--r--kernel/trace/trace_hwlat.c11
-rw-r--r--kernel/trace/trace_osnoise.c10
-rw-r--r--kernel/watch_queue.c1
18 files changed, 101 insertions, 46 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index fa22ec79ac0e..73780748404c 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4569,6 +4569,7 @@ static int btf_datasec_resolve(struct btf_verifier_env *env,
struct btf *btf = env->btf;
u16 i;
+ env->resolve_mode = RESOLVE_TBD;
for_each_vsi_from(i, v->next_member, v->t, vsi) {
u32 var_type_id = vsi->type, type_id, type_size = 0;
const struct btf_type *var_type = btf_type_by_id(env->btf,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b297e9f60ca1..e2d256c82072 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -972,7 +972,7 @@ static int __init bpf_jit_charge_init(void)
{
/* Only used as heuristic here to derive limit. */
bpf_jit_limit_max = bpf_jit_alloc_exec_limit();
- bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2,
+ bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 1,
PAGE_SIZE), LONG_MAX);
return 0;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 272563a0b770..d517d13878cf 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3826,6 +3826,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
continue;
if (type == STACK_MISC)
continue;
+ if (type == STACK_INVALID && env->allow_uninit_stack)
+ continue;
verbose(env, "invalid read from stack off %d+%d size %d\n",
off, i, size);
return -EACCES;
@@ -3863,6 +3865,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
continue;
if (type == STACK_ZERO)
continue;
+ if (type == STACK_INVALID && env->allow_uninit_stack)
+ continue;
verbose(env, "invalid read from stack off %d+%d size %d\n",
off, i, size);
return -EACCES;
@@ -5754,7 +5758,8 @@ static int check_stack_range_initialized(
stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
if (*stype == STACK_MISC)
goto mark;
- if (*stype == STACK_ZERO) {
+ if ((*stype == STACK_ZERO) ||
+ (*stype == STACK_INVALID && env->allow_uninit_stack)) {
if (clobber) {
/* helper can write anything into the stack */
*stype = STACK_MISC;
@@ -13936,6 +13941,10 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
continue;
+ if (env->allow_uninit_stack &&
+ old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
+ continue;
+
/* explored stack has more populated slots than current stack
* and these slots were used
*/
diff --git a/kernel/compat.c b/kernel/compat.c
index 55551989d9da..fb50f29d9b36 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -152,7 +152,7 @@ COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t, pid, unsigned int, len,
if (len & (sizeof(compat_ulong_t)-1))
return -EINVAL;
- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
ret = sched_getaffinity(pid, mask);
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 846add8394c4..be61332c66b5 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -21,7 +21,7 @@ static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
arch_enter_from_user_mode(regs);
lockdep_hardirqs_off(CALLER_ADDR0);
- CT_WARN_ON(ct_state() != CONTEXT_USER);
+ CT_WARN_ON(__ct_state() != CONTEXT_USER);
user_exit_irqoff();
instrumentation_begin();
@@ -192,13 +192,14 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
static void exit_to_user_mode_prepare(struct pt_regs *regs)
{
- unsigned long ti_work = read_thread_flags();
+ unsigned long ti_work;
lockdep_assert_irqs_disabled();
/* Flush pending rcuog wakeup before the last need_resched() check */
tick_nohz_user_enter_prepare();
+ ti_work = read_thread_flags();
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
ti_work = exit_to_user_mode_loop(regs, ti_work);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f79fd8b87f75..fb3e436bcd4a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2163,7 +2163,7 @@ static void perf_group_detach(struct perf_event *event)
/* Inherit group flags from the previous leader */
sibling->group_caps = event->group_caps;
- if (!RB_EMPTY_NODE(&event->group_node)) {
+ if (sibling->attach_state & PERF_ATTACH_CONTEXT) {
add_event_to_groups(sibling, event->ctx);
if (sibling->state == PERF_EVENT_STATE_ACTIVE)
@@ -3872,7 +3872,7 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
if (likely(!ctx->nr_events))
return;
- if (is_active ^ EVENT_TIME) {
+ if (!(is_active & EVENT_TIME)) {
/* start ctx time */
__update_context_time(ctx, false);
perf_cgroup_set_timestamp(cpuctx);
@@ -9187,7 +9187,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data)
perf_event_header__init_id(&bpf_event->event_id.header,
&sample, event);
- ret = perf_output_begin(&handle, data, event,
+ ret = perf_output_begin(&handle, &sample, event,
bpf_event->event_id.header.size);
if (ret)
return;
diff --git a/kernel/fork.c b/kernel/fork.c
index f68954d05e89..c0257cbee093 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -755,11 +755,6 @@ static void check_mm(struct mm_struct *mm)
for (i = 0; i < NR_MM_COUNTERS; i++) {
long x = percpu_counter_sum(&mm->rss_stat[i]);
- if (likely(!x))
- continue;
-
- /* Making sure this is not due to race with CPU offlining. */
- x = percpu_counter_sum_all(&mm->rss_stat[i]);
if (unlikely(x))
pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
mm, resident_page_types[i], x);
@@ -2936,7 +2931,7 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs)
* - make the CLONE_DETACHED bit reusable for clone3
* - make the CSIGNAL bits reusable for clone3
*/
- if (kargs->flags & (CLONE_DETACHED | CSIGNAL))
+ if (kargs->flags & (CLONE_DETACHED | (CSIGNAL & (~CLONE_NEWTIME))))
return false;
if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) ==
diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
index 8cf70f068d92..a45f3dfc8d14 100644
--- a/kernel/kcsan/Makefile
+++ b/kernel/kcsan/Makefile
@@ -16,6 +16,6 @@ obj-y := core.o debugfs.o report.o
KCSAN_INSTRUMENT_BARRIERS_selftest.o := y
obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
-CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer
+CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -fno-omit-frame-pointer
CFLAGS_kcsan_test.o += $(DISABLE_STRUCTLEAK_PLUGIN)
obj-$(CONFIG_KCSAN_KUNIT_TEST) += kcsan_test.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index af017e038b48..0d18c3969f90 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2084,6 +2084,9 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
void activate_task(struct rq *rq, struct task_struct *p, int flags)
{
+ if (task_on_rq_migrating(p))
+ flags |= ENQUEUE_MIGRATED;
+
enqueue_task(rq, p, flags);
p->on_rq = TASK_ON_RQ_QUEUED;
@@ -8414,14 +8417,14 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
if (len & (sizeof(unsigned long)-1))
return -EINVAL;
- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
ret = sched_getaffinity(pid, mask);
if (ret == 0) {
unsigned int retlen = min(len, cpumask_size());
- if (copy_to_user(user_mask_ptr, mask, retlen))
+ if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen))
ret = -EFAULT;
else
ret = retlen;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7a1b1f855b96..6986ea31c984 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4648,11 +4648,33 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
#endif
}
+static inline bool entity_is_long_sleeper(struct sched_entity *se)
+{
+ struct cfs_rq *cfs_rq;
+ u64 sleep_time;
+
+ if (se->exec_start == 0)
+ return false;
+
+ cfs_rq = cfs_rq_of(se);
+
+ sleep_time = rq_clock_task(rq_of(cfs_rq));
+
+ /* Happen while migrating because of clock task divergence */
+ if (sleep_time <= se->exec_start)
+ return false;
+
+ sleep_time -= se->exec_start;
+ if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
+ return true;
+
+ return false;
+}
+
static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
{
u64 vruntime = cfs_rq->min_vruntime;
- u64 sleep_time;
/*
* The 'current' period is already promised to the current tasks,
@@ -4684,13 +4706,24 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
/*
* Pull vruntime of the entity being placed to the base level of
- * cfs_rq, to prevent boosting it if placed backwards. If the entity
- * slept for a long time, don't even try to compare its vruntime with
- * the base as it may be too far off and the comparison may get
- * inversed due to s64 overflow.
- */
- sleep_time = rq_clock_task(rq_of(cfs_rq)) - se->exec_start;
- if ((s64)sleep_time > 60LL * NSEC_PER_SEC)
+ * cfs_rq, to prevent boosting it if placed backwards.
+ * However, min_vruntime can advance much faster than real time, with
+ * the extreme being when an entity with the minimal weight always runs
+ * on the cfs_rq. If the waking entity slept for a long time, its
+ * vruntime difference from min_vruntime may overflow s64 and their
+ * comparison may get inversed, so ignore the entity's original
+ * vruntime in that case.
+ * The maximal vruntime speedup is given by the ratio of normal to
+ * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
+ * When placing a migrated waking entity, its exec_start has been set
+ * from a different rq. In order to take into account a possible
+ * divergence between new and prev rq's clocks task because of irq and
+ * stolen time, we take an additional margin.
+ * So, cutting off on the sleep time of
+ * 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
+ * should be safe.
+ */
+ if (entity_is_long_sleeper(se))
se->vruntime = vruntime;
else
se->vruntime = max_vruntime(se->vruntime, vruntime);
@@ -4770,6 +4803,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (flags & ENQUEUE_WAKEUP)
place_entity(cfs_rq, se, 0);
+ /* Entity has migrated, no longer consider this task hot */
+ if (flags & ENQUEUE_MIGRATED)
+ se->exec_start = 0;
check_schedstat_required();
update_stats_enqueue_fair(cfs_rq, se, flags);
@@ -7657,9 +7693,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
/* Tell new CPU we are migrated */
se->avg.last_update_time = 0;
- /* We have migrated, no longer consider this task hot */
- se->exec_start = 0;
-
update_scan_period(p, new_cpu);
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 29baa97d0d53..0feea145bb29 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1564,7 +1564,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
key.flags = end; /* overload flags, as it is unsigned long */
for (pg = ftrace_pages_start; pg; pg = pg->next) {
- if (end < pg->records[0].ip ||
+ if (pg->index == 0 ||
+ end < pg->records[0].ip ||
start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
continue;
rec = bsearch(&key, pg->records, pg->index,
@@ -2591,7 +2592,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip,
arch_ftrace_set_direct_caller(fregs, addr);
}
-struct ftrace_ops direct_ops = {
+static struct ftrace_ops direct_ops = {
.func = call_direct_funcs,
.flags = FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
| FTRACE_OPS_FL_PERMANENT,
diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c
index 4850fdfe27f1..5a4b722b5045 100644
--- a/kernel/trace/kprobe_event_gen_test.c
+++ b/kernel/trace/kprobe_event_gen_test.c
@@ -146,7 +146,7 @@ static int __init test_gen_kprobe_cmd(void)
if (trace_event_file_is_valid(gen_kprobe_test))
gen_kprobe_test = NULL;
/* We got an error after creating the event, delete it */
- ret = kprobe_event_delete("gen_kprobe_test");
+ kprobe_event_delete("gen_kprobe_test");
goto out;
}
@@ -211,7 +211,7 @@ static int __init test_gen_kretprobe_cmd(void)
if (trace_event_file_is_valid(gen_kretprobe_test))
gen_kretprobe_test = NULL;
/* We got an error after creating the event, delete it */
- ret = kprobe_event_delete("gen_kretprobe_test");
+ kprobe_event_delete("gen_kretprobe_test");
goto out;
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index af50d931b020..c6f47b6cfd5f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -354,10 +354,6 @@ static void rb_init_page(struct buffer_data_page *bpage)
local_set(&bpage->commit, 0);
}
-/*
- * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
- * this issue out.
- */
static void free_buffer_page(struct buffer_page *bpage)
{
free_page((unsigned long)bpage->page);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 45551c7b4c36..937e9676dfd4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5167,6 +5167,8 @@ loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
static const struct file_operations tracing_fops = {
.open = tracing_open,
.read = seq_read,
+ .read_iter = seq_read_iter,
+ .splice_read = generic_file_splice_read,
.write = tracing_write_stub,
.llseek = tracing_lseek,
.release = tracing_release,
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 89877a18f933..486cca3c2b75 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1331,6 +1331,9 @@ static const char *hist_field_name(struct hist_field *field,
{
const char *field_name = "";
+ if (WARN_ON_ONCE(!field))
+ return field_name;
+
if (level > 1)
return field_name;
@@ -4235,6 +4238,15 @@ static int __create_val_field(struct hist_trigger_data *hist_data,
goto out;
}
+ /* Some types cannot be a value */
+ if (hist_field->flags & (HIST_FIELD_FL_GRAPH | HIST_FIELD_FL_PERCENT |
+ HIST_FIELD_FL_BUCKET | HIST_FIELD_FL_LOG2 |
+ HIST_FIELD_FL_SYM | HIST_FIELD_FL_SYM_OFFSET |
+ HIST_FIELD_FL_SYSCALL | HIST_FIELD_FL_STACKTRACE)) {
+ hist_err(file->tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(field_str));
+ ret = -EINVAL;
+ }
+
hist_data->fields[val_idx] = hist_field;
++hist_data->n_vals;
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index d440ddd5fd8b..2f37a6e68aa9 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -339,7 +339,7 @@ static void move_to_next_cpu(void)
cpumask_clear(current_mask);
cpumask_set_cpu(next_cpu, current_mask);
- sched_setaffinity(0, current_mask);
+ set_cpus_allowed_ptr(current, current_mask);
return;
change_mode:
@@ -446,7 +446,7 @@ static int start_single_kthread(struct trace_array *tr)
}
- sched_setaffinity(kthread->pid, current_mask);
+ set_cpus_allowed_ptr(kthread, current_mask);
kdata->kthread = kthread;
wake_up_process(kthread);
@@ -492,6 +492,10 @@ static int start_cpu_kthread(unsigned int cpu)
{
struct task_struct *kthread;
+ /* Do not start a new hwlatd thread if it is already running */
+ if (per_cpu(hwlat_per_cpu_data, cpu).kthread)
+ return 0;
+
kthread = kthread_run_on_cpu(kthread_fn, NULL, cpu, "hwlatd/%u");
if (IS_ERR(kthread)) {
pr_err(BANNER "could not start sampling thread\n");
@@ -584,9 +588,6 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
*/
cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
- for_each_online_cpu(cpu)
- per_cpu(hwlat_per_cpu_data, cpu).kthread = NULL;
-
for_each_cpu(cpu, current_mask) {
retval = start_cpu_kthread(cpu);
if (retval)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 04f0fdae19a1..9176bb7a9bb4 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -217,7 +217,7 @@ struct osnoise_variables {
/*
* Per-cpu runtime information.
*/
-DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
+static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
/*
* this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
@@ -240,7 +240,7 @@ struct timerlat_variables {
u64 count;
};
-DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
+static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
/*
* this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
@@ -332,7 +332,7 @@ struct timerlat_sample {
/*
* Protect the interface.
*/
-struct mutex interface_lock;
+static struct mutex interface_lock;
/*
* Tracer data.
@@ -2239,8 +2239,8 @@ static struct trace_min_max_param osnoise_print_stack = {
/*
* osnoise/timerlat_period: min 100 us, max 1 s
*/
-u64 timerlat_min_period = 100;
-u64 timerlat_max_period = 1000000;
+static u64 timerlat_min_period = 100;
+static u64 timerlat_max_period = 1000000;
static struct trace_min_max_param timerlat_period = {
.lock = &interface_lock,
.val = &osnoise_data.timerlat_period,
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index a6f9bdd956c3..f10f403104e7 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -273,6 +273,7 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes)
if (ret < 0)
goto error;
+ ret = -ENOMEM;
pages = kcalloc(sizeof(struct page *), nr_pages, GFP_KERNEL);
if (!pages)
goto error;