summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_watch.c8
-rw-r--r--kernel/cgroup.c35
-rw-r--r--kernel/configs/tiny.config8
-rw-r--r--kernel/cpu.c9
-rw-r--r--kernel/cpuset.c32
-rw-r--r--kernel/events/core.c49
-rw-r--r--kernel/events/ring_buffer.c15
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/fork.c37
-rw-r--r--kernel/irq/chip.c8
-rw-r--r--kernel/kexec_file.c3
-rw-r--r--kernel/memremap.c9
-rw-r--r--kernel/power/Kconfig4
-rw-r--r--kernel/power/hibernate.c21
-rw-r--r--kernel/power/main.c1
-rw-r--r--kernel/power/power.h2
-rw-r--r--kernel/power/qos.c11
-rw-r--r--kernel/power/snapshot.c22
-rw-r--r--kernel/power/suspend.c14
-rw-r--r--kernel/printk/nmi.c38
-rw-r--r--kernel/sched/core.c22
-rw-r--r--kernel/sched/cpufreq.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c122
-rw-r--r--kernel/sched/deadline.c5
-rw-r--r--kernel/sched/fair.c23
-rw-r--r--kernel/sched/rt.c5
-rw-r--r--kernel/sched/sched.h40
-rw-r--r--kernel/seccomp.c12
-rw-r--r--kernel/time/tick-sched.c3
-rw-r--r--kernel/trace/trace.c29
30 files changed, 419 insertions, 177 deletions
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index d6709eb70970..0d302a87f21b 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -19,6 +19,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/file.h>
#include <linux/kernel.h>
#include <linux/audit.h>
#include <linux/kthread.h>
@@ -544,10 +545,11 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
unsigned long ino;
dev_t dev;
- rcu_read_lock();
- exe_file = rcu_dereference(tsk->mm->exe_file);
+ exe_file = get_task_exe_file(tsk);
+ if (!exe_file)
+ return 0;
ino = exe_file->f_inode->i_ino;
dev = exe_file->f_inode->i_sb->s_dev;
- rcu_read_unlock();
+ fput(exe_file);
return audit_mark_compare(mark, ino, dev);
}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d1c51b7f5221..d6b729beba49 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3446,9 +3446,28 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
* Except for the root, subtree_control must be zero for a cgroup
* with tasks so that child cgroups don't compete against tasks.
*/
- if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
- ret = -EBUSY;
- goto out_unlock;
+ if (enable && cgroup_parent(cgrp)) {
+ struct cgrp_cset_link *link;
+
+ /*
+ * Because namespaces pin csets too, @cgrp->cset_links
+ * might not be empty even when @cgrp is empty. Walk and
+ * verify each cset.
+ */
+ spin_lock_irq(&css_set_lock);
+
+ ret = 0;
+ list_for_each_entry(link, &cgrp->cset_links, cset_link) {
+ if (css_set_populated(link->cset)) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+
+ spin_unlock_irq(&css_set_lock);
+
+ if (ret)
+ goto out_unlock;
}
/* save and update control masks and prepare csses */
@@ -3899,7 +3918,9 @@ void cgroup_file_notify(struct cgroup_file *cfile)
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
*
- * Return the number of tasks in the cgroup.
+ * Return the number of tasks in the cgroup. The returned number can be
+ * higher than the actual number of tasks due to css_set references from
+ * namespace roots and temporary usages.
*/
static int cgroup_task_count(const struct cgroup *cgrp)
{
@@ -6270,6 +6291,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
if (cgroup_sk_alloc_disabled)
return;
+ /* Socket clone path */
+ if (skcd->val) {
+ cgroup_get(sock_cgroup_ptr(skcd));
+ return;
+ }
+
rcu_read_lock();
while (true) {
diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config
index c2de56ab0fce..7fa0c4ae6394 100644
--- a/kernel/configs/tiny.config
+++ b/kernel/configs/tiny.config
@@ -1,4 +1,12 @@
+# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_KERNEL_GZIP is not set
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
CONFIG_KERNEL_XZ=y
+# CONFIG_KERNEL_LZO is not set
+# CONFIG_KERNEL_LZ4 is not set
CONFIG_OPTIMIZE_INLINING=y
+# CONFIG_SLAB is not set
+# CONFIG_SLUB is not set
CONFIG_SLOB=y
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 341bf80f80bd..ebbf027dd4a1 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1024,12 +1024,13 @@ EXPORT_SYMBOL_GPL(cpu_up);
#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_var_t frozen_cpus;
-int disable_nonboot_cpus(void)
+int freeze_secondary_cpus(int primary)
{
- int cpu, first_cpu, error = 0;
+ int cpu, error = 0;
cpu_maps_update_begin();
- first_cpu = cpumask_first(cpu_online_mask);
+ if (!cpu_online(primary))
+ primary = cpumask_first(cpu_online_mask);
/*
* We take down all of the non-boot CPUs in one shot to avoid races
* with the userspace trying to use the CPU hotplug at the same time
@@ -1038,7 +1039,7 @@ int disable_nonboot_cpus(void)
pr_info("Disabling non-boot CPUs ...\n");
for_each_online_cpu(cpu) {
- if (cpu == first_cpu)
+ if (cpu == primary)
continue;
trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index c7fd2778ed50..2b4c20ab5bbe 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -325,8 +325,7 @@ static struct file_system_type cpuset_fs_type = {
/*
* Return in pmask the portion of a cpusets's cpus_allowed that
* are online. If none are online, walk up the cpuset hierarchy
- * until we find one that does have some online cpus. The top
- * cpuset always has some cpus online.
+ * until we find one that does have some online cpus.
*
* One way or another, we guarantee to return some non-empty subset
* of cpu_online_mask.
@@ -335,8 +334,20 @@ static struct file_system_type cpuset_fs_type = {
*/
static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
{
- while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask))
+ while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
cs = parent_cs(cs);
+ if (unlikely(!cs)) {
+ /*
+ * The top cpuset doesn't have any online cpu as a
+ * consequence of a race between cpuset_hotplug_work
+ * and cpu hotplug notifier. But we know the top
+ * cpuset's effective_cpus is on its way to to be
+ * identical to cpu_online_mask.
+ */
+ cpumask_copy(pmask, cpu_online_mask);
+ return;
+ }
+ }
cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
}
@@ -2069,6 +2080,20 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
mutex_unlock(&cpuset_mutex);
}
+/*
+ * Make sure the new task conform to the current state of its parent,
+ * which could have been changed by cpuset just after it inherits the
+ * state from the parent and before it sits on the cgroup's task list.
+ */
+static void cpuset_fork(struct task_struct *task)
+{
+ if (task_css_is_root(task, cpuset_cgrp_id))
+ return;
+
+ set_cpus_allowed_ptr(task, &current->cpus_allowed);
+ task->mems_allowed = current->mems_allowed;
+}
+
struct cgroup_subsys cpuset_cgrp_subsys = {
.css_alloc = cpuset_css_alloc,
.css_online = cpuset_css_online,
@@ -2079,6 +2104,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind,
+ .fork = cpuset_fork,
.legacy_cftypes = files,
.early_init = true,
};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cfabdf7b942..fc9bb2225291 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2496,11 +2496,11 @@ static int __perf_event_stop(void *info)
return 0;
}
-static int perf_event_restart(struct perf_event *event)
+static int perf_event_stop(struct perf_event *event, int restart)
{
struct stop_event_data sd = {
.event = event,
- .restart = 1,
+ .restart = restart,
};
int ret = 0;
@@ -3549,10 +3549,18 @@ static int perf_event_read(struct perf_event *event, bool group)
.group = group,
.ret = 0,
};
- ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
- /* The event must have been read from an online CPU: */
- WARN_ON_ONCE(ret);
- ret = ret ? : data.ret;
+ /*
+ * Purposely ignore the smp_call_function_single() return
+ * value.
+ *
+ * If event->oncpu isn't a valid CPU it means the event got
+ * scheduled out and that will have updated the event count.
+ *
+ * Therefore, either way, we'll have an up-to-date event count
+ * after this.
+ */
+ (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+ ret = data.ret;
} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
@@ -3921,7 +3929,7 @@ static void exclusive_event_destroy(struct perf_event *event)
static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
{
- if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) &&
+ if ((e1->pmu == e2->pmu) &&
(e1->cpu == e2->cpu ||
e1->cpu == -1 ||
e2->cpu == -1))
@@ -4837,6 +4845,19 @@ static void ring_buffer_attach(struct perf_event *event,
spin_unlock_irqrestore(&rb->event_lock, flags);
}
+ /*
+ * Avoid racing with perf_mmap_close(AUX): stop the event
+ * before swizzling the event::rb pointer; if it's getting
+ * unmapped, its aux_mmap_count will be 0 and it won't
+ * restart. See the comment in __perf_pmu_output_stop().
+ *
+ * Data will inevitably be lost when set_output is done in
+ * mid-air, but then again, whoever does it like this is
+ * not in for the data anyway.
+ */
+ if (has_aux(event))
+ perf_event_stop(event, 0);
+
rcu_assign_pointer(event->rb, rb);
if (old_rb) {
@@ -6112,7 +6133,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
raw_spin_unlock_irqrestore(&ifh->lock, flags);
if (restart)
- perf_event_restart(event);
+ perf_event_stop(event, 1);
}
void perf_event_exec(void)
@@ -6156,7 +6177,13 @@ static void __perf_event_output_stop(struct perf_event *event, void *data)
/*
* In case of inheritance, it will be the parent that links to the
- * ring-buffer, but it will be the child that's actually using it:
+ * ring-buffer, but it will be the child that's actually using it.
+ *
+ * We are using event::rb to determine if the event should be stopped,
+ * however this may race with ring_buffer_attach() (through set_output),
+ * which will make us skip the event that actually needs to be stopped.
+ * So ring_buffer_attach() has to stop an aux event before re-assigning
+ * its rb pointer.
*/
if (rcu_dereference(parent->rb) == rb)
ro->err = __perf_event_stop(&sd);
@@ -6670,7 +6697,7 @@ static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
raw_spin_unlock_irqrestore(&ifh->lock, flags);
if (restart)
- perf_event_restart(event);
+ perf_event_stop(event, 1);
}
/*
@@ -7859,7 +7886,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
mmput(mm);
restart:
- perf_event_restart(event);
+ perf_event_stop(event, 1);
}
/*
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index ae9b90dc9a5a..257fa460b846 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -330,15 +330,22 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
if (!rb)
return NULL;
- if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
+ if (!rb_has_aux(rb))
goto err;
/*
- * If rb::aux_mmap_count is zero (and rb_has_aux() above went through),
- * the aux buffer is in perf_mmap_close(), about to get freed.
+ * If aux_mmap_count is zero, the aux buffer is in perf_mmap_close(),
+ * about to get freed, so we leave immediately.
+ *
+ * Checking rb::aux_mmap_count and rb::refcount has to be done in
+ * the same order, see perf_mmap_close. Otherwise we end up freeing
+ * aux pages in this path, which is a bug, because in_atomic().
*/
if (!atomic_read(&rb->aux_mmap_count))
- goto err_put;
+ goto err;
+
+ if (!atomic_inc_not_zero(&rb->aux_refcount))
+ goto err;
/*
* Nesting is not supported for AUX area, make sure nested
diff --git a/kernel/exit.c b/kernel/exit.c
index 2f974ae042a6..091a78be3b09 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -848,12 +848,7 @@ void do_exit(long code)
TASKS_RCU(preempt_enable());
exit_notify(tsk, group_dead);
proc_exit_connector(tsk);
-#ifdef CONFIG_NUMA
- task_lock(tsk);
- mpol_put(tsk->mempolicy);
- tsk->mempolicy = NULL;
- task_unlock(tsk);
-#endif
+ mpol_put_task_policy(tsk);
#ifdef CONFIG_FUTEX
if (unlikely(current->pi_state_cache))
kfree(current->pi_state_cache);
diff --git a/kernel/fork.c b/kernel/fork.c
index 52e725d4a866..beb31725f7e2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -799,6 +799,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm)
EXPORT_SYMBOL(get_mm_exe_file);
/**
+ * get_task_exe_file - acquire a reference to the task's executable file
+ *
+ * Returns %NULL if task's mm (if any) has no associated executable file or
+ * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
+ * User must release file via fput().
+ */
+struct file *get_task_exe_file(struct task_struct *task)
+{
+ struct file *exe_file = NULL;
+ struct mm_struct *mm;
+
+ task_lock(task);
+ mm = task->mm;
+ if (mm) {
+ if (!(task->flags & PF_KTHREAD))
+ exe_file = get_mm_exe_file(mm);
+ }
+ task_unlock(task);
+ return exe_file;
+}
+EXPORT_SYMBOL(get_task_exe_file);
+
+/**
* get_task_mm - acquire a reference to the task's mm
*
* Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
@@ -913,14 +936,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
deactivate_mm(tsk, mm);
/*
- * If we're exiting normally, clear a user-space tid field if
- * requested. We leave this alone when dying by signal, to leave
- * the value intact in a core dump, and to save the unnecessary
- * trouble, say, a killed vfork parent shouldn't touch this mm.
- * Userland only wants this done for a sys_exit.
+ * Signal userspace if we're not exiting with a core dump
+ * because we want to leave the value intact for debugging
+ * purposes.
*/
if (tsk->clear_child_tid) {
- if (!(tsk->flags & PF_SIGNALED) &&
+ if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
atomic_read(&mm->mm_users) > 1) {
/*
* We don't check the error code - if userspace has
@@ -1404,7 +1425,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->real_start_time = ktime_get_boot_ns();
p->io_context = NULL;
p->audit_context = NULL;
- threadgroup_change_begin(current);
cgroup_fork(p);
#ifdef CONFIG_NUMA
p->mempolicy = mpol_dup(p->mempolicy);
@@ -1556,6 +1576,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
INIT_LIST_HEAD(&p->thread_group);
p->task_works = NULL;
+ threadgroup_change_begin(current);
/*
* Ensure that the cgroup subsystem policies allow the new process to be
* forked. It should be noted the the new process's css_set can be changed
@@ -1656,6 +1677,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
bad_fork_cancel_cgroup:
cgroup_cancel_fork(p);
bad_fork_free_pid:
+ threadgroup_change_end(current);
if (pid != &init_struct_pid)
free_pid(pid);
bad_fork_cleanup_thread:
@@ -1688,7 +1710,6 @@ bad_fork_cleanup_policy:
mpol_put(p->mempolicy);
bad_fork_cleanup_threadgroup_lock:
#endif
- threadgroup_change_end(current);
delayacct_tsk_free(p);
bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 637389088b3f..26ba5654d9d5 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -820,6 +820,8 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
desc->name = name;
if (handle != handle_bad_irq && is_chained) {
+ unsigned int type = irqd_get_trigger_type(&desc->irq_data);
+
/*
* We're about to start this interrupt immediately,
* hence the need to set the trigger configuration.
@@ -828,8 +830,10 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
* chained interrupt. Reset it immediately because we
* do know better.
*/
- __irq_set_trigger(desc, irqd_get_trigger_type(&desc->irq_data));
- desc->handle_irq = handle;
+ if (type != IRQ_TYPE_NONE) {
+ __irq_set_trigger(desc, type);
+ desc->handle_irq = handle;
+ }
irq_settings_set_noprobe(desc);
irq_settings_set_norequest(desc);
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 503bc2d348e5..037c321c5618 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -887,7 +887,10 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min,
return 0;
out:
vfree(pi->sechdrs);
+ pi->sechdrs = NULL;
+
vfree(pi->purgatory_buf);
+ pi->purgatory_buf = NULL;
return ret;
}
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 251d16b4cb41..b501e390bb34 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -247,6 +247,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
arch_remove_memory(align_start, align_size);
+ untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
pgmap_radix_release(res);
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
"%s: failed to free all reserved pages\n", __func__);
@@ -282,6 +283,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
struct percpu_ref *ref, struct vmem_altmap *altmap)
{
resource_size_t key, align_start, align_size, align_end;
+ pgprot_t pgprot = PAGE_KERNEL;
struct dev_pagemap *pgmap;
struct page_map *page_map;
int error, nid, is_ram;
@@ -351,6 +353,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (nid < 0)
nid = numa_mem_id();
+ error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0,
+ align_size);
+ if (error)
+ goto err_pfn_remap;
+
error = arch_add_memory(nid, align_start, align_size, true);
if (error)
goto err_add_memory;
@@ -371,6 +378,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
return __va(res->start);
err_add_memory:
+ untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+ err_pfn_remap:
err_radix:
pgmap_radix_release(res);
devres_free(page_map);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 68d3ebc12601..e8517b63eb37 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -186,7 +186,7 @@ config PM_SLEEP_DEBUG
config DPM_WATCHDOG
bool "Device suspend/resume watchdog"
- depends on PM_DEBUG && PSTORE
+ depends on PM_DEBUG && PSTORE && EXPERT
---help---
Sets up a watchdog timer to capture drivers that are
locked up attempting to suspend/resume a device.
@@ -197,7 +197,7 @@ config DPM_WATCHDOG
config DPM_WATCHDOG_TIMEOUT
int "Watchdog timeout in seconds"
range 1 120
- default 60
+ default 120
depends on DPM_WATCHDOG
config PM_TRACE
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 33c79b6105c5..b26dbc48c75b 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -306,8 +306,10 @@ static int create_image(int platform_mode)
if (error)
printk(KERN_ERR "PM: Error %d creating hibernation image\n",
error);
- if (!in_suspend)
+ if (!in_suspend) {
events_check_enabled = false;
+ clear_free_pages();
+ }
platform_leave(platform_mode);
@@ -1189,22 +1191,6 @@ static int __init nohibernate_setup(char *str)
return 1;
}
-static int __init page_poison_nohibernate_setup(char *str)
-{
-#ifdef CONFIG_PAGE_POISONING_ZERO
- /*
- * The zeroing option for page poison skips the checks on alloc.
- * since hibernation doesn't save free pages there's no way to
- * guarantee the pages will still be zeroed.
- */
- if (!strcmp(str, "on")) {
- pr_info("Disabling hibernation due to page poisoning\n");
- return nohibernate_setup(str);
- }
-#endif
- return 1;
-}
-
__setup("noresume", noresume_setup);
__setup("resume_offset=", resume_offset_setup);
__setup("resume=", resume_setup);
@@ -1212,4 +1198,3 @@ __setup("hibernate=", hibernate_setup);
__setup("resumewait", resumewait_setup);
__setup("resumedelay=", resumedelay_setup);
__setup("nohibernate", nohibernate_setup);
-__setup("page_poison=", page_poison_nohibernate_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 5ea50b1b7595..281a697fd458 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -644,6 +644,7 @@ static int __init pm_init(void)
return error;
hibernate_image_size_init();
hibernate_reserved_size_init();
+ pm_states_init();
power_kobj = kobject_create_and_add("power", NULL);
if (!power_kobj)
return -ENOMEM;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 242d8b827dd5..56d1d0dedf76 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -110,6 +110,8 @@ extern int create_basic_memory_bitmaps(void);
extern void free_basic_memory_bitmaps(void);
extern int hibernate_preallocate_memory(void);
+extern void clear_free_pages(void);
+
/**
* Auxiliary structure used for reading the snapshot image data and
* metadata from and writing them to the list of page backup entries
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 97b0df71303e..168ff442ebde 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -482,7 +482,16 @@ void pm_qos_update_request(struct pm_qos_request *req,
return;
}
- cancel_delayed_work_sync(&req->work);
+ /*
+ * This function may be called very early during boot, for example,
+ * from of_clk_init(), where irq needs to stay disabled.
+ * cancel_delayed_work_sync() assumes that irq is enabled on
+ * invocation and re-enables it on return. Avoid calling it until
+ * workqueue is initialized.
+ */
+ if (keventd_up())
+ cancel_delayed_work_sync(&req->work);
+
__pm_qos_update_request(req, new_value);
}
EXPORT_SYMBOL_GPL(pm_qos_update_request);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index b02228411d57..4f0f0604f1c4 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1132,6 +1132,28 @@ void free_basic_memory_bitmaps(void)
pr_debug("PM: Basic memory bitmaps freed\n");
}
+void clear_free_pages(void)
+{
+#ifdef CONFIG_PAGE_POISONING_ZERO
+ struct memory_bitmap *bm = free_pages_map;
+ unsigned long pfn;
+
+ if (WARN_ON(!(free_pages_map)))
+ return;
+
+ memory_bm_position_reset(bm);
+ pfn = memory_bm_next_pfn(bm);
+ while (pfn != BM_END_OF_MAP) {
+ if (pfn_valid(pfn))
+ clear_highpage(pfn_to_page(pfn));
+
+ pfn = memory_bm_next_pfn(bm);
+ }
+ memory_bm_position_reset(bm);
+ pr_info("PM: free pages cleared after restore\n");
+#endif /* PAGE_POISONING_ZERO */
+}
+
/**
* snapshot_additional_pages - Estimate the number of extra pages needed.
* @zone: Memory zone to carry out the computation for.
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 0acab9d7f96f..1e7f5da648d9 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -118,10 +118,18 @@ static bool valid_state(suspend_state_t state)
*/
static bool relative_states;
+void __init pm_states_init(void)
+{
+ /*
+ * freeze state should be supported even without any suspend_ops,
+ * initialize pm_states accordingly here
+ */
+ pm_states[PM_SUSPEND_FREEZE] = pm_labels[relative_states ? 0 : 2];
+}
+
static int __init sleep_states_setup(char *str)
{
relative_states = !strncmp(str, "1", 1);
- pm_states[PM_SUSPEND_FREEZE] = pm_labels[relative_states ? 0 : 2];
return 1;
}
@@ -211,7 +219,7 @@ static int platform_suspend_begin(suspend_state_t state)
{
if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin)
return freeze_ops->begin();
- else if (suspend_ops->begin)
+ else if (suspend_ops && suspend_ops->begin)
return suspend_ops->begin(state);
else
return 0;
@@ -221,7 +229,7 @@ static void platform_resume_end(suspend_state_t state)
{
if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end)
freeze_ops->end();
- else if (suspend_ops->end)
+ else if (suspend_ops && suspend_ops->end)
suspend_ops->end();
}
diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c
index b69eb8a2876f..16bab471c7e2 100644
--- a/kernel/printk/nmi.c
+++ b/kernel/printk/nmi.c
@@ -99,27 +99,33 @@ again:
return add;
}
-/*
- * printk one line from the temporary buffer from @start index until
- * and including the @end index.
- */
-static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end)
+static void printk_nmi_flush_line(const char *text, int len)
{
- const char *buf = s->buffer + start;
-
/*
* The buffers are flushed in NMI only on panic. The messages must
* go only into the ring buffer at this stage. Consoles will get
* explicitly called later when a crashdump is not generated.
*/
if (in_nmi())
- printk_deferred("%.*s", (end - start) + 1, buf);
+ printk_deferred("%.*s", len, text);
else
- printk("%.*s", (end - start) + 1, buf);
+ printk("%.*s", len, text);
}
/*
+ * printk one line from the temporary buffer from @start index until
+ * and including the @end index.
+ */
+static void printk_nmi_flush_seq_line(struct nmi_seq_buf *s,
+ int start, int end)
+{
+ const char *buf = s->buffer + start;
+
+ printk_nmi_flush_line(buf, (end - start) + 1);
+}
+
+/*
* Flush data from the associated per_CPU buffer. The function
* can be called either via IRQ work or independently.
*/
@@ -150,9 +156,11 @@ more:
* the buffer an unexpected way. If we printed something then
* @len must only increase.
*/
- if (i && i >= len)
- pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n",
- i, len);
+ if (i && i >= len) {
+ const char *msg = "printk_nmi_flush: internal error\n";
+
+ printk_nmi_flush_line(msg, strlen(msg));
+ }
if (!len)
goto out; /* Someone else has already flushed the buffer. */
@@ -166,14 +174,14 @@ more:
/* Print line by line. */
for (; i < size; i++) {
if (s->buffer[i] == '\n') {
- print_nmi_seq_line(s, last_i, i);
+ printk_nmi_flush_seq_line(s, last_i, i);
last_i = i + 1;
}
}
/* Check if there was a partial line. */
if (last_i < size) {
- print_nmi_seq_line(s, last_i, size - 1);
- pr_cont("\n");
+ printk_nmi_flush_seq_line(s, last_i, size - 1);
+ printk_nmi_flush_line("\n", strlen("\n"));
}
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2a906f20fba7..44817c640e99 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2016,6 +2016,28 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
success = 1; /* we're going to change ->state */
cpu = task_cpu(p);
+ /*
+ * Ensure we load p->on_rq _after_ p->state, otherwise it would
+ * be possible to, falsely, observe p->on_rq == 0 and get stuck
+ * in smp_cond_load_acquire() below.
+ *
+ * sched_ttwu_pending() try_to_wake_up()
+ * [S] p->on_rq = 1; [L] P->state
+ * UNLOCK rq->lock -----.
+ * \
+ * +--- RMB
+ * schedule() /
+ * LOCK rq->lock -----'
+ * UNLOCK rq->lock
+ *
+ * [task p]
+ * [S] p->state = UNINTERRUPTIBLE [L] p->on_rq
+ *
+ * Pairs with the UNLOCK+LOCK on rq->lock from the
+ * last wakeup of our task and the schedule that got our task
+ * current.
+ */
+ smp_rmb();
if (p->on_rq && ttwu_remote(p, wake_flags))
goto stat;
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 1141954e73b4..dbc51442ecbc 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
*/
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
void (*func)(struct update_util_data *data, u64 time,
- unsigned long util, unsigned long max))
+ unsigned int flags))
{
if (WARN_ON(!data || !func))
return;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index a84641b222c1..69e06898997d 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -12,7 +12,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cpufreq.h>
-#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/power.h>
@@ -48,11 +47,14 @@ struct sugov_cpu {
struct sugov_policy *sg_policy;
unsigned int cached_raw_freq;
+ unsigned long iowait_boost;
+ unsigned long iowait_boost_max;
+ u64 last_update;
/* The fields below are only needed when sharing a policy. */
unsigned long util;
unsigned long max;
- u64 last_update;
+ unsigned int flags;
};
static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -144,24 +146,75 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
return cpufreq_driver_resolve_freq(policy, freq);
}
+static void sugov_get_util(unsigned long *util, unsigned long *max)
+{
+ struct rq *rq = this_rq();
+ unsigned long cfs_max;
+
+ cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
+
+ *util = min(rq->cfs.avg.util_avg, cfs_max);
+ *max = cfs_max;
+}
+
+static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
+ unsigned int flags)
+{
+ if (flags & SCHED_CPUFREQ_IOWAIT) {
+ sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ } else if (sg_cpu->iowait_boost) {
+ s64 delta_ns = time - sg_cpu->last_update;
+
+ /* Clear iowait_boost if the CPU apprears to have been idle. */
+ if (delta_ns > TICK_NSEC)
+ sg_cpu->iowait_boost = 0;
+ }
+}
+
+static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
+ unsigned long *max)
+{
+ unsigned long boost_util = sg_cpu->iowait_boost;
+ unsigned long boost_max = sg_cpu->iowait_boost_max;
+
+ if (!boost_util)
+ return;
+
+ if (*util * boost_max < *max * boost_util) {
+ *util = boost_util;
+ *max = boost_max;
+ }
+ sg_cpu->iowait_boost >>= 1;
+}
+
static void sugov_update_single(struct update_util_data *hook, u64 time,
- unsigned long util, unsigned long max)
+ unsigned int flags)
{
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
+ unsigned long util, max;
unsigned int next_f;
+ sugov_set_iowait_boost(sg_cpu, time, flags);
+ sg_cpu->last_update = time;
+
if (!sugov_should_update_freq(sg_policy, time))
return;
- next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
- get_next_freq(sg_cpu, util, max);
+ if (flags & SCHED_CPUFREQ_RT_DL) {
+ next_f = policy->cpuinfo.max_freq;
+ } else {
+ sugov_get_util(&util, &max);
+ sugov_iowait_boost(sg_cpu, &util, &max);
+ next_f = get_next_freq(sg_cpu, util, max);
+ }
sugov_update_commit(sg_policy, time, next_f);
}
static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
- unsigned long util, unsigned long max)
+ unsigned long util, unsigned long max,
+ unsigned int flags)
{
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
@@ -169,9 +222,11 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
u64 last_freq_update_time = sg_policy->last_freq_update_time;
unsigned int j;
- if (util == ULONG_MAX)
+ if (flags & SCHED_CPUFREQ_RT_DL)
return max_f;
+ sugov_iowait_boost(sg_cpu, &util, &max);
+
for_each_cpu(j, policy->cpus) {
struct sugov_cpu *j_sg_cpu;
unsigned long j_util, j_max;
@@ -186,41 +241,50 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
* frequency update and the time elapsed between the last update
* of the CPU utilization and the last frequency update is long
* enough, don't take the CPU into account as it probably is
- * idle now.
+ * idle now (and clear iowait_boost for it).
*/
delta_ns = last_freq_update_time - j_sg_cpu->last_update;
- if (delta_ns > TICK_NSEC)
+ if (delta_ns > TICK_NSEC) {
+ j_sg_cpu->iowait_boost = 0;
continue;
-
- j_util = j_sg_cpu->util;
- if (j_util == ULONG_MAX)
+ }
+ if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
return max_f;
+ j_util = j_sg_cpu->util;
j_max = j_sg_cpu->max;
if (j_util * max > j_max * util) {
util = j_util;
max = j_max;
}
+
+ sugov_iowait_boost(j_sg_cpu, &util, &max);
}
return get_next_freq(sg_cpu, util, max);
}
static void sugov_update_shared(struct update_util_data *hook, u64 time,
- unsigned long util, unsigned long max)
+ unsigned int flags)
{
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+ unsigned long util, max;
unsigned int next_f;
+ sugov_get_util(&util, &max);
+
raw_spin_lock(&sg_policy->update_lock);
sg_cpu->util = util;
sg_cpu->max = max;
+ sg_cpu->flags = flags;
+
+ sugov_set_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;
if (sugov_should_update_freq(sg_policy, time)) {
- next_f = sugov_next_freq_shared(sg_cpu, util, max);
+ next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
sugov_update_commit(sg_policy, time, next_f);
}
@@ -444,10 +508,13 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_cpu->sg_policy = sg_policy;
if (policy_is_shared(policy)) {
- sg_cpu->util = ULONG_MAX;
+ sg_cpu->util = 0;
sg_cpu->max = 0;
+ sg_cpu->flags = SCHED_CPUFREQ_RT;
sg_cpu->last_update = 0;
sg_cpu->cached_raw_freq = 0;
+ sg_cpu->iowait_boost = 0;
+ sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
sugov_update_shared);
} else {
@@ -495,28 +562,15 @@ static struct cpufreq_governor schedutil_gov = {
.limits = sugov_limits,
};
-static int __init sugov_module_init(void)
-{
- return cpufreq_register_governor(&schedutil_gov);
-}
-
-static void __exit sugov_module_exit(void)
-{
- cpufreq_unregister_governor(&schedutil_gov);
-}
-
-MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
-MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
-MODULE_LICENSE("GPL");
-
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
struct cpufreq_governor *cpufreq_default_governor(void)
{
return &schedutil_gov;
}
-
-fs_initcall(sugov_module_init);
-#else
-module_init(sugov_module_init);
#endif
-module_exit(sugov_module_exit);
+
+static int __init sugov_register(void)
+{
+ return cpufreq_register_governor(&schedutil_gov);
+}
+fs_initcall(sugov_register);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 1ce8867283dc..974779656999 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -735,9 +735,8 @@ static void update_curr_dl(struct rq *rq)
return;
}
- /* kick cpufreq (see the comment in linux/cpufreq.h). */
- if (cpu_of(rq) == smp_processor_id())
- cpufreq_trigger_update(rq_clock(rq));
+ /* kick cpufreq (see the comment in kernel/sched/sched.h). */
+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 039de34f1521..a5cd07b25aa1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2875,12 +2875,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
{
- struct rq *rq = rq_of(cfs_rq);
- int cpu = cpu_of(rq);
-
- if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
- unsigned long max = rq->cpu_capacity_orig;
-
+ if (&this_rq()->cfs == cfs_rq) {
/*
* There are a few boundary cases this might miss but it should
* get called often enough that that should (hopefully) not be
@@ -2897,8 +2892,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
*
* See cpu_util().
*/
- cpufreq_update_util(rq_clock(rq),
- min(cfs_rq->avg.util_avg, max), max);
+ cpufreq_update_util(rq_of(cfs_rq), 0);
}
}
@@ -3159,10 +3153,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
static inline void update_load_avg(struct sched_entity *se, int not_used)
{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
- struct rq *rq = rq_of(cfs_rq);
-
- cpufreq_trigger_update(rq_clock(rq));
+ cpufreq_update_util(rq_of(cfs_rq_of(se)), 0);
}
static inline void
@@ -4509,6 +4500,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ /*
+ * If in_iowait is set, the code below may not trigger any cpufreq
+ * utilization updates, so do it here explicitly with the IOWAIT flag
+ * passed.
+ */
+ if (p->in_iowait)
+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
+
for_each_sched_entity(se) {
if (se->on_rq)
break;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d5690b722691..2516b8df6dbb 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -957,9 +957,8 @@ static void update_curr_rt(struct rq *rq)
if (unlikely((s64)delta_exec <= 0))
return;
- /* Kick cpufreq (see the comment in linux/cpufreq.h). */
- if (cpu_of(rq) == smp_processor_id())
- cpufreq_trigger_update(rq_clock(rq));
+ /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+ cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c64fc5114004..b7fc1ced4380 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1763,27 +1763,13 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
/**
* cpufreq_update_util - Take a note about CPU utilization changes.
- * @time: Current time.
- * @util: Current utilization.
- * @max: Utilization ceiling.
+ * @rq: Runqueue to carry out the update for.
+ * @flags: Update reason flags.
*
- * This function is called by the scheduler on every invocation of
- * update_load_avg() on the CPU whose utilization is being updated.
+ * This function is called by the scheduler on the CPU whose utilization is
+ * being updated.
*
* It can only be called from RCU-sched read-side critical sections.
- */
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
-{
- struct update_util_data *data;
-
- data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
- if (data)
- data->func(data, time, util, max);
-}
-
-/**
- * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
- * @time: Current time.
*
* The way cpufreq is currently arranged requires it to evaluate the CPU
* performance state (frequency/voltage) on a regular basis to prevent it from
@@ -1797,13 +1783,23 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo
* but that really is a band-aid. Going forward it should be replaced with
* solutions targeted more specifically at RT and DL tasks.
*/
-static inline void cpufreq_trigger_update(u64 time)
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
+{
+ struct update_util_data *data;
+
+ data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
+ if (data)
+ data->func(data, rq_clock(rq), flags);
+}
+
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
{
- cpufreq_update_util(time, ULONG_MAX, 0);
+ if (cpu_of(rq) == smp_processor_id())
+ cpufreq_update_util(rq, flags);
}
#else
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
-static inline void cpufreq_trigger_update(u64 time) {}
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
#ifdef arch_scale_freq_capacity
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ef6c6c3f9d8a..0db7c8a2afe2 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
ptrace_event(PTRACE_EVENT_SECCOMP, data);
/*
* The delivery of a fatal signal during event
- * notification may silently skip tracer notification.
- * Terminating the task now avoids executing a system
- * call that may not be intended.
+ * notification may silently skip tracer notification,
+ * which could leave us with a potentially unmodified
+ * syscall that the tracer would have liked to have
+ * changed. Since the process is about to die, we just
+ * force the syscall to be skipped and let the signal
+ * kill the process and correctly handle any tracer exit
+ * notifications.
*/
if (fatal_signal_pending(current))
- do_exit(SIGSYS);
+ goto skip;
/* Check if the tracer forced the syscall to be skipped. */
this_syscall = syscall_get_nr(current, task_pt_regs(current));
if (this_syscall < 0)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 204fdc86863d..2ec7c00228f3 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -908,10 +908,11 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)
ktime_t now, expires;
int cpu = smp_processor_id();
+ now = tick_nohz_start_idle(ts);
+
if (can_stop_idle_tick(cpu, ts)) {
int was_stopped = ts->tick_stopped;
- now = tick_nohz_start_idle(ts);
ts->idle_calls++;
expires = tick_nohz_stop_sched_tick(ts, now, cpu);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dade4c9559cc..7bc56762ca35 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5124,19 +5124,20 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
struct trace_iterator *iter = filp->private_data;
ssize_t sret;
- /* return any leftover data */
- sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
- if (sret != -EBUSY)
- return sret;
-
- trace_seq_init(&iter->seq);
-
/*
* Avoid more than one consumer on a single file descriptor
* This is just a matter of traces coherency, the ring buffer itself
* is protected.
*/
mutex_lock(&iter->mutex);
+
+ /* return any leftover data */
+ sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+ if (sret != -EBUSY)
+ goto out;
+
+ trace_seq_init(&iter->seq);
+
if (iter->trace->read) {
sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
if (sret)
@@ -6163,9 +6164,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
return -EBUSY;
#endif
- if (splice_grow_spd(pipe, &spd))
- return -ENOMEM;
-
if (*ppos & (PAGE_SIZE - 1))
return -EINVAL;
@@ -6175,6 +6173,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
len &= PAGE_MASK;
}
+ if (splice_grow_spd(pipe, &spd))
+ return -ENOMEM;
+
again:
trace_access_lock(iter->cpu_file);
entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
@@ -6232,19 +6233,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
/* did we read anything? */
if (!spd.nr_pages) {
if (ret)
- return ret;
+ goto out;
+ ret = -EAGAIN;
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
- return -EAGAIN;
+ goto out;
ret = wait_on_pipe(iter, true);
if (ret)
- return ret;
+ goto out;
goto again;
}
ret = splice_to_pipe(pipe, &spd);
+out:
splice_shrink_spd(&spd);
return ret;