diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2009-12-10 15:44:54 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2009-12-10 15:44:54 +1100 |
commit | 7822ef68445b137025d3fd71de97a9a68af78a09 (patch) | |
tree | e72ec43ab42ce3fb61366267dd364168084191a5 | |
parent | ab5ab7be7acab4c597d542af3c835605e7c046d2 (diff) | |
parent | 33ac6d0c0089864063411732c263dab152178caf (diff) |
Merge remote branch 'limits/writable_limits'
Conflicts:
arch/x86/ia32/ia32entry.S
arch/x86/include/asm/unistd_32.h
arch/x86/include/asm/unistd_64.h
arch/x86/kernel/syscall_table_32.S
48 files changed, 451 insertions, 153 deletions
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 599b233bef75..a97f838835ad 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2292,7 +2292,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) * return -ENOMEM; */ - if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) + if (size > task_rlimit(task, RLIMIT_MEMLOCK)) return -ENOMEM; /* diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 92ed83f34036..b6ba74a83e54 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -129,7 +129,7 @@ ia64_brk (unsigned long brk) goto out; /* Check against rlimit.. */ - rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + rlim = rlimit(RLIMIT_DATA); if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) goto out; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 1857766a63c1..dcd8eb739faa 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -91,7 +91,7 @@ dma_mark_clean(void *addr, size_t size) inline void ia64_set_rbs_bot (void) { - unsigned long stack_size = current->signal->rlim[RLIMIT_STACK].rlim_max & -16; + unsigned long stack_size = rlimit_max(RLIMIT_STACK) & -16; if (stack_size > MAX_USER_STACK_SIZE) stack_size = MAX_USER_STACK_SIZE; diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c index 0d957a4c70fe..5a783d8e8e8e 100644 --- a/arch/powerpc/mm/mmap_64.c +++ b/arch/powerpc/mm/mmap_64.c @@ -47,7 +47,7 @@ static inline int mmap_is_legacy(void) if (current->personality & ADDR_COMPAT_LAYOUT) return 1; - if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) return 1; return sysctl_legacy_va_layout; @@ -77,7 +77,7 @@ static unsigned long mmap_rnd(void) static inline unsigned long mmap_base(void) { - unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + unsigned long gap = rlimit(RLIMIT_STACK); if (gap < MIN_GAP) gap = MIN_GAP; diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index c4d4a19235e0..eea120229cdb 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -54,7 +54,7 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer, */ static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset) { - unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; + unsigned long limit = rlimit(RLIMIT_CORE); ssize_t written; if (*foffset + nr > limit) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index f4558ccf02b9..869efbaed3ea 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -40,7 +40,7 @@ static inline unsigned long mmap_base(void) { - unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + unsigned long gap = rlimit(RLIMIT_STACK); if (gap < MIN_GAP) gap = MIN_GAP; @@ -61,7 +61,7 @@ static inline int mmap_is_legacy(void) #endif return sysctl_legacy_va_layout || (current->personality & ADDR_COMPAT_LAYOUT) || - current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY; + rlimit(RLIMIT_STACK) == RLIM_INFINITY; } #ifndef CONFIG_64BIT diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index e2d102447a43..f4392832a9da 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -361,6 +361,7 @@ EXPORT_SYMBOL(get_fb_unmapped_area); void arch_pick_mmap_layout(struct mm_struct *mm) { unsigned long random_factor = 0UL; + unsigned long gap; if (current->flags & PF_RANDOMIZE) { random_factor = get_random_int(); @@ -375,9 +376,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * Fall back to the standard layout if the personality * bit is set, or if the expected stack growth is unlimited: */ + gap = rlimit(RLIMIT_STACK); if (!test_thread_flag(TIF_32BIT) || (current->personality & ADDR_COMPAT_LAYOUT) || - current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY || + gap == RLIM_INFINITY || sysctl_legacy_va_layout) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; @@ -385,9 +387,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) } else { /* We know it's 32-bit */ unsigned long task_size = STACK_TOP32; - unsigned long gap; - gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; if (gap < 128 * 1024 * 1024) gap = 128 * 1024 * 1024; if (gap > (task_size / 6 * 5)) diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 2a4d073d2cf1..06474788723f 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -297,7 +297,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) * size limits imposed on them by creating programs with large * arrays in the data or bss. */ - rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + rlim = rlimit(RLIMIT_DATA); if (rlim >= RLIM_INFINITY) rlim = ~0; if (ex.a_data + ex.a_bss > rlim) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index d2e8911c5cf8..f8551f21c4d8 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -844,4 +844,6 @@ ia32_sys_call_table: .quad compat_sys_recvmmsg .quad sys_fanotify_init .quad sys32_fanotify_mark + .quad compat_sys_getprlimit /* 340 */ + .quad compat_sys_setprlimit ia32_syscall_end: diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index cf70e9e17dbc..64c7765ea4a1 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -345,10 +345,12 @@ #define __NR_recvmmsg 337 #define __NR_fanotify_init 338 #define __NR_fanotify_mark 339 +#define __NR_getprlimit 340 +#define __NR_setprlimit 341 #ifdef __KERNEL__ -#define NR_syscalls 340 +#define NR_syscalls 342 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 7fbd20f943f6..78e2e0bf1394 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -667,6 +667,10 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg) __SYSCALL(__NR_fanotify_init, sys_fanotify_init) #define __NR_fanotify_mark 301 __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) +#define __NR_getprlimit 302 +__SYSCALL(__NR_getprlimit, sys_getprlimit) +#define __NR_setprlimit 303 +__SYSCALL(__NR_setprlimit, sys_setprlimit) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index ca486c74897d..2bd2e7f535c7 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -339,3 +339,5 @@ ENTRY(sys_call_table) .long sys_recvmmsg .long sys_fanotify_init .long sys_fanotify_mark + .long sys_getprlimit /* 340 */ + .long sys_setprlimit diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index c8191defc38a..1dab5194fd9d 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -71,7 +71,7 @@ static int mmap_is_legacy(void) if (current->personality & ADDR_COMPAT_LAYOUT) return 1; - if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) return 1; return sysctl_legacy_va_layout; @@ -96,7 +96,7 @@ static unsigned long mmap_rnd(void) static unsigned long mmap_base(void) { - unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + unsigned long gap = rlimit(RLIMIT_STACK); if (gap < MIN_GAP) gap = MIN_GAP; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 6f7c096abf13..4f906f0614f0 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -136,7 +136,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, down_write(¤t->mm->mmap_sem); locked = npages + current->mm->locked_vm; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index 82878e348627..eb7d59abd12d 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -59,8 +59,7 @@ static int __get_user_pages(unsigned long start_page, size_t num_pages, size_t got; int ret; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> - PAGE_SHIFT; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if (num_pages > lock_limit) { ret = -ENOMEM; diff --git a/fs/attr.c b/fs/attr.c index 96d394bdaddf..34ade0e04886 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -82,7 +82,7 @@ int inode_newsize_ok(const struct inode *inode, loff_t offset) if (inode->i_size < offset) { unsigned long limit; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + limit = rlimit(RLIMIT_FSIZE); if (limit != RLIM_INFINITY && offset > limit) goto out_sig; if (offset > inode->i_sb->s_maxbytes) diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index b639dcf7c778..06d892e21669 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -246,7 +246,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) * size limits imposed on them by creating programs with large * arrays in the data or bss. */ - rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + rlim = rlimit(RLIMIT_DATA); if (rlim >= RLIM_INFINITY) rlim = ~0; if (ex.a_data + ex.a_bss > rlim) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index a2796651e756..b78b9de64514 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -501,7 +501,7 @@ static int load_flat_file(struct linux_binprm * bprm, * size limits imposed on them by creating programs with large * arrays in the data or bss. */ - rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + rlim = rlimit(RLIMIT_DATA); if (rlim >= RLIM_INFINITY) rlim = ~0; if (data_len + bss_len > rlim) { diff --git a/fs/exec.c b/fs/exec.c index f01cfe94c929..6ae069018761 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -195,7 +195,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, * to work from. */ rlim = current->signal->rlim; - if (size > rlim[RLIMIT_STACK].rlim_cur / 4) { + if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) { put_page(page); return NULL; } @@ -574,7 +574,7 @@ int setup_arg_pages(struct linux_binprm *bprm, #ifdef CONFIG_STACK_GROWSUP /* Limit stack size to 1GB */ - stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max; + stack_base = rlimit_max(RLIMIT_STACK); if (stack_base > (1 << 30)) stack_base = 1 << 30; @@ -1499,7 +1499,7 @@ static int format_corename(char *corename, long signr) /* core limit size */ case 'c': rc = snprintf(out_ptr, out_end - out_ptr, - "%lu", current->signal->rlim[RLIMIT_CORE].rlim_cur); + "%lu", rlimit(RLIMIT_CORE)); if (rc > out_end - out_ptr) goto out; out_ptr += rc; @@ -1758,7 +1758,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) int retval = 0; int flag = 0; int ispipe = 0; - unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; + unsigned long core_limit = rlimit(RLIMIT_CORE); char **helper_argv = NULL; int helper_argc = 0; int dump_count = 0; diff --git a/fs/fcntl.c b/fs/fcntl.c index 2cf93ec40a67..09c6271fe26e 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -344,7 +344,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, switch (cmd) { case F_DUPFD: case F_DUPFD_CLOEXEC: - if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + if (arg >= rlimit(RLIMIT_NOFILE)) break; err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); if (err >= 0) { diff --git a/fs/file.c b/fs/file.c index 87e129030ab1..d1c21d8e428d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -257,7 +257,7 @@ int expand_files(struct files_struct *files, int nr) * N.B. For clone tasks sharing a files structure, this test * will limit the total number of files that can be opened. */ - if (nr >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + if (nr >= rlimit(RLIMIT_NOFILE)) return -EMFILE; /* Do we need to expand? */ diff --git a/fs/proc/array.c b/fs/proc/array.c index 4badde179b18..2e5c2a34e06e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -266,7 +266,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) collect_sigign_sigcatch(p, &ignored, &caught); num_threads = atomic_read(&p->signal->count); qsize = atomic_read(&__task_cred(p)->user->sigpending); - qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; + qlim = task_rlimit(p, RLIMIT_SIGPENDING); unlock_task_sighand(p, &flags); } @@ -502,7 +502,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, cutime = sig->cutime; cstime = sig->cstime; cgtime = sig->cgtime; - rsslim = sig->rlim[RLIMIT_RSS].rlim_cur; + rsslim = ACCESS_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); /* add up live thread stats at the group level */ if (whole) { diff --git a/fs/proc/base.c b/fs/proc/base.c index af643b5aefe8..9fdb990fc6d4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -477,19 +477,30 @@ static const struct limit_names lnames[RLIM_NLIMITS] = { }; /* Display limits for a process */ -static int proc_pid_limits(struct task_struct *task, char *buffer) +static ssize_t limits_read(struct file *file, char __user *buf, size_t rcount, + loff_t *ppos) { - unsigned int i; - int count = 0; - unsigned long flags; - char *bufptr = buffer; - struct rlimit rlim[RLIM_NLIMITS]; + struct task_struct *task; + unsigned long flags; + unsigned int i; + ssize_t count = 0; + char *bufptr; - if (!lock_task_sighand(task, &flags)) + task = get_proc_task(file->f_path.dentry->d_inode); + if (!task) + return -ESRCH; + if (!lock_task_sighand(task, &flags)) { + put_task_struct(task); return 0; + } memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); unlock_task_sighand(task, &flags); + put_task_struct(task); + + bufptr = (char *)__get_free_page(GFP_TEMPORARY); + if (!bufptr) + return -ENOMEM; /* * print the file header @@ -518,9 +529,81 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) count += sprintf(&bufptr[count], "\n"); } + count = simple_read_from_buffer(buf, rcount, ppos, bufptr, count); + + free_page((unsigned long)bufptr); + + return count; +} + +static ssize_t limits_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + char str[32 + 1 + 16 + 1 + 16 + 1], *delim, *next; + struct rlimit new_rlimit; + unsigned int i; + int ret; + + if (!task) { + count = -ESRCH; + goto out; + } + if (copy_from_user(str, buf, min(count, sizeof(str) - 1))) { + count = -EFAULT; + goto put_task; + } + + str[min(count, sizeof(str) - 1)] = 0; + + delim = strchr(str, '='); + if (!delim) { + count = -EINVAL; + goto put_task; + } + *delim++ = 0; /* for easy 'str' usage */ + new_rlimit.rlim_cur = simple_strtoul(delim, &next, 0); + if (*next != ':') { + if (strncmp(delim, "unlimited:", 10)) { + count = -EINVAL; + goto put_task; + } + new_rlimit.rlim_cur = RLIM_INFINITY; + next = delim + 9; /* move to ':' */ + } + delim = next + 1; + new_rlimit.rlim_max = simple_strtoul(delim, &next, 0); + if (*next != 0) { + if (strcmp(delim, "unlimited")) { + count = -EINVAL; + goto put_task; + } + new_rlimit.rlim_max = RLIM_INFINITY; + } + + for (i = 0; i < RLIM_NLIMITS; i++) + if (!strcmp(str, lnames[i].name)) + break; + if (i >= RLIM_NLIMITS) { + count = -EINVAL; + goto put_task; + } + + ret = do_setrlimit(task, i, &new_rlimit); + if (ret) + count = ret; + +put_task: + put_task_struct(task); +out: return count; } +static const struct file_operations proc_pid_limits_operations = { + .read = limits_read, + .write = limits_write, +}; + #ifdef CONFIG_HAVE_ARCH_TRACEHOOK static int proc_pid_syscall(struct task_struct *task, char *buffer) { @@ -2500,7 +2583,7 @@ static const struct pid_entry tgid_base_stuff[] = { INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), - INF("limits", S_IRUSR, proc_pid_limits), + REG("limits", S_IRUSR|S_IWUSR, proc_pid_limits_operations), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif @@ -2834,7 +2917,7 @@ static const struct pid_entry tid_base_stuff[] = { INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), - INF("limits", S_IRUSR, proc_pid_limits), + REG("limits", S_IRUSR|S_IWUSR, proc_pid_limits_operations), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif diff --git a/fs/select.c b/fs/select.c index fd38ce2e32e3..73715e90030f 100644 --- a/fs/select.c +++ b/fs/select.c @@ -821,7 +821,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, struct poll_list *walk = head; unsigned long todo = nfds; - if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + if (nfds > rlimit(RLIMIT_NOFILE)) return -EINVAL; len = min_t(unsigned int, nfds, N_STACK_PPS); diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 7c38c147e5e6..0b3ea8bc4e3b 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -622,9 +622,13 @@ __SYSCALL(__NR_move_pages, sys_move_pages) __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) #define __NR_perf_event_open 241 __SYSCALL(__NR_perf_event_open, sys_perf_event_open) +#define __NR_getprlimit 242 +__SYSCALL(__NR_getprlimit, sys_getprlimit) +#define __NR_setprlimit 243 +__SYSCALL(__NR_setprlimit, sys_setprlimit) #undef __NR_syscalls -#define __NR_syscalls 242 +#define __NR_syscalls 244 /* * All syscalls below here should go away really, diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 24f050d2893c..dbee58e4d60d 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -141,7 +141,6 @@ header-y += qnxtypes.h header-y += qnx4_fs.h header-y += radeonfb.h header-y += raw.h -header-y += resource.h header-y += romfs_fs.h header-y += rose.h header-y += serial_reg.h @@ -326,6 +325,7 @@ unifdef-y += irqnr.h unifdef-y += reboot.h unifdef-y += reiserfs_fs.h unifdef-y += reiserfs_xattr.h +unifdef-y += resource.h unifdef-y += route.h unifdef-y += rtc.h unifdef-y += rtnetlink.h diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 4f71bf4e628c..3e23844a6990 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -117,6 +117,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, long clock_nanosleep_restart(struct restart_block *restart_block); -void update_rlimit_cpu(unsigned long rlim_new); +void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); #endif diff --git a/include/linux/resource.h b/include/linux/resource.h index 40fc7e626082..cf8dc96653ee 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -3,8 +3,6 @@ #include <linux/time.h> -struct task_struct; - /* * Resource control/accounting header file for linux */ @@ -70,6 +68,14 @@ struct rlimit { */ #include <asm/resource.h> +#ifdef __KERNEL__ + +struct task_struct; + int getrusage(struct task_struct *p, int who, struct rusage __user *ru); +int do_setrlimit(struct task_struct *tsk, unsigned int resource, + struct rlimit *new_rlim); + +#endif /* __KERNEL__ */ #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 89115ec7d43f..9521759b8fc8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2585,6 +2585,28 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +static inline unsigned long task_rlimit(const struct task_struct *tsk, + unsigned int limit) +{ + return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur); +} + +static inline unsigned long task_rlimit_max(const struct task_struct *tsk, + unsigned int limit) +{ + return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max); +} + +static inline unsigned long rlimit(unsigned int limit) +{ + return task_rlimit(current, limit); +} + +static inline unsigned long rlimit_max(unsigned int limit) +{ + return task_rlimit_max(current, limit); +} + #define TASK_STATE_TO_CHAR_STR "RSDTtZX" #endif /* __KERNEL__ */ diff --git a/include/linux/security.h b/include/linux/security.h index 466cbadbd1ef..9c3a43b20ce7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1591,7 +1591,8 @@ struct security_operations { int (*task_setnice) (struct task_struct *p, int nice); int (*task_setioprio) (struct task_struct *p, int ioprio); int (*task_getioprio) (struct task_struct *p); - int (*task_setrlimit) (unsigned int resource, struct rlimit *new_rlim); + int (*task_setrlimit) (struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim); int (*task_setscheduler) (struct task_struct *p, int policy, struct sched_param *lp); int (*task_getscheduler) (struct task_struct *p); @@ -1856,7 +1857,8 @@ int security_task_setgroups(struct group_info *group_info); int security_task_setnice(struct task_struct *p, int nice); int security_task_setioprio(struct task_struct *p, int ioprio); int security_task_getioprio(struct task_struct *p); -int security_task_setrlimit(unsigned int resource, struct rlimit *new_rlim); +int security_task_setrlimit(struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim); int security_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); int security_task_getscheduler(struct task_struct *p); @@ -2472,7 +2474,8 @@ static inline int security_task_getioprio(struct task_struct *p) return 0; } -static inline int security_task_setrlimit(unsigned int resource, +static inline int security_task_setrlimit(struct task_struct *p, + unsigned int resource, struct rlimit *new_rlim) { return 0; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 79d82d1f3c0b..c22067006ffa 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -657,11 +657,15 @@ asmlinkage long sys_newuname(struct new_utsname __user *name); asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim); +asmlinkage long sys_getprlimit(pid_t pid, unsigned int resource, + struct rlimit __user *rlim); #if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64)) asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim); #endif asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim); +asmlinkage long sys_setprlimit(pid_t pid, unsigned int resource, + struct rlimit __user *rlim); asmlinkage long sys_getrusage(int who, struct rusage __user *ru); asmlinkage long sys_umask(int mask); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index ee9d69707c0a..f5d4bd25a83d 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -153,7 +153,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, spin_lock(&mq_lock); if (u->mq_bytes + mq_bytes < u->mq_bytes || u->mq_bytes + mq_bytes > - p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) { + task_rlimit(p, RLIMIT_MSGQUEUE)) { spin_unlock(&mq_lock); goto out_inode; } diff --git a/ipc/shm.c b/ipc/shm.c index 464694e0aa4a..757f5967b39b 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -761,8 +761,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) if (euid != shp->shm_perm.uid && euid != shp->shm_perm.cuid) goto out_unlock; - if (cmd == SHM_LOCK && - !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) + if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) goto out_unlock; } diff --git a/kernel/compat.c b/kernel/compat.c index f6c204f07ea6..c1d57043676b 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -274,6 +274,39 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, return ret; } +static int get_compat_rlimit(struct rlimit *dst, + const struct compat_rlimit __user *src) +{ + if (!access_ok(VERIFY_READ, src, sizeof(*src)) || + __get_user(dst->rlim_cur, &src->rlim_cur) || + __get_user(dst->rlim_max, &src->rlim_max)) + return -EFAULT; + + if (dst->rlim_cur == COMPAT_RLIM_INFINITY) + dst->rlim_cur = RLIM_INFINITY; + if (dst->rlim_max == COMPAT_RLIM_INFINITY) + dst->rlim_max = RLIM_INFINITY; + return 0; +} + +static int put_compat_rlimit(const struct rlimit *src, + struct compat_rlimit __user *dst) +{ + struct rlimit r = *src; + + if (r.rlim_cur > COMPAT_RLIM_INFINITY) + r.rlim_cur = COMPAT_RLIM_INFINITY; + if (r.rlim_max > COMPAT_RLIM_INFINITY) + r.rlim_max = COMPAT_RLIM_INFINITY; + + if (!access_ok(VERIFY_WRITE, dst, sizeof(*dst)) || + __put_user(r.rlim_cur, &dst->rlim_cur) || + __put_user(r.rlim_max, &dst->rlim_max)) + return -EFAULT; + + return 0; +} + asmlinkage long compat_sys_setrlimit(unsigned int resource, struct compat_rlimit __user *rlim) { @@ -284,17 +317,12 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource, if (resource >= RLIM_NLIMITS) return -EINVAL; - if (!access_ok(VERIFY_READ, rlim, sizeof(*rlim)) || - __get_user(r.rlim_cur, &rlim->rlim_cur) || - __get_user(r.rlim_max, &rlim->rlim_max)) - return -EFAULT; + ret = get_compat_rlimit(&r, rlim); + if (ret) + return ret; - if (r.rlim_cur == COMPAT_RLIM_INFINITY) - r.rlim_cur = RLIM_INFINITY; - if (r.rlim_max == COMPAT_RLIM_INFINITY) - r.rlim_max = RLIM_INFINITY; set_fs(KERNEL_DS); - ret = sys_setrlimit(resource, (struct rlimit __user *) &r); + ret = sys_setrlimit(resource, (struct rlimit __force __user *)&r); set_fs(old_fs); return ret; } @@ -336,19 +364,42 @@ asmlinkage long compat_sys_getrlimit (unsigned int resource, mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - ret = sys_getrlimit(resource, (struct rlimit __user *) &r); + ret = sys_getrlimit(resource, (struct rlimit __force __user *)&r); set_fs(old_fs); - if (!ret) { - if (r.rlim_cur > COMPAT_RLIM_INFINITY) - r.rlim_cur = COMPAT_RLIM_INFINITY; - if (r.rlim_max > COMPAT_RLIM_INFINITY) - r.rlim_max = COMPAT_RLIM_INFINITY; + if (!ret) + ret = put_compat_rlimit(&r, rlim); + return ret; +} - if (!access_ok(VERIFY_WRITE, rlim, sizeof(*rlim)) || - __put_user(r.rlim_cur, &rlim->rlim_cur) || - __put_user(r.rlim_max, &rlim->rlim_max)) - return -EFAULT; - } +asmlinkage long compat_sys_setprlimit(pid_t pid, unsigned int resource, + struct compat_rlimit __user *rlim) +{ + mm_segment_t old_fs = get_fs(); + struct rlimit r; + int ret; + + ret = get_compat_rlimit(&r, rlim); + if (ret) + return ret; + + set_fs(KERNEL_DS); + ret = sys_setprlimit(pid, resource, (struct rlimit __force __user *)&r); + set_fs(old_fs); + return ret; +} + +asmlinkage long compat_sys_getprlimit(pid_t pid, unsigned int resource, + struct compat_rlimit __user *rlim) +{ + mm_segment_t old_fs = get_fs(); + struct rlimit r; + int ret; + + set_fs(KERNEL_DS); + ret = sys_getprlimit(pid, resource, (struct rlimit __force __user *)&r); + set_fs(old_fs); + if (!ret) + ret = put_compat_rlimit(&r, rlim); return ret; } diff --git a/kernel/fork.c b/kernel/fork.c index 1415dc4598ae..f3885e71bf0c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -824,6 +824,8 @@ void __cleanup_sighand(struct sighand_struct *sighand) */ static void posix_cpu_timers_init_group(struct signal_struct *sig) { + unsigned long cpu_limit; + /* Thread group counters. */ thread_group_cputime_init(sig); @@ -838,9 +840,9 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) sig->cputime_expires.virt_exp = cputime_zero; sig->cputime_expires.sched_exp = 0; - if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { - sig->cputime_expires.prof_exp = - secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); + cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); + if (cpu_limit != RLIM_INFINITY) { + sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); sig->cputimer.running = 1; } @@ -1033,7 +1035,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= - p->signal->rlim[RLIMIT_NPROC].rlim_cur) { + task_rlimit(p, RLIMIT_NPROC)) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 82224c19342e..309cec534dd8 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2457,7 +2457,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (user_locked > user_lock_limit) extra = user_locked - user_lock_limit; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; locked = vma->vm_mm->locked_vm + extra; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 438ff4523513..5bd038c313e9 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -13,16 +13,16 @@ /* * Called after updating RLIMIT_CPU to set timer expiration if necessary. */ -void update_rlimit_cpu(unsigned long rlim_new) +void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) { cputime_t cputime = secs_to_cputime(rlim_new); - struct signal_struct *const sig = current->signal; + struct signal_struct *const sig = task->signal; if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { - spin_lock_irq(¤t->sighand->siglock); - set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); - spin_unlock_irq(¤t->sighand->siglock); + spin_lock_irq(&task->sighand->siglock); + set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL); + spin_unlock_irq(&task->sighand->siglock); } } @@ -639,7 +639,7 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) if (expires_le(sig->it[CPUCLOCK_PROF].expires, exp->cpu)) break; - i = sig->rlim[RLIMIT_CPU].rlim_cur; + i = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); if (i != RLIM_INFINITY && i <= cputime_to_secs(exp->cpu)) break; @@ -982,6 +982,7 @@ static void check_thread_timers(struct task_struct *tsk, int maxfire; struct list_head *timers = tsk->cpu_timers; struct signal_struct *const sig = tsk->signal; + unsigned long soft; maxfire = 20; tsk->cputime_expires.prof_exp = cputime_zero; @@ -1030,9 +1031,10 @@ static void check_thread_timers(struct task_struct *tsk, /* * Check for the special case thread timers. */ - if (sig->rlim[RLIMIT_RTTIME].rlim_cur != RLIM_INFINITY) { - unsigned long hard = sig->rlim[RLIMIT_RTTIME].rlim_max; - unsigned long *soft = &sig->rlim[RLIMIT_RTTIME].rlim_cur; + soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur); + if (soft != RLIM_INFINITY) { + unsigned long hard = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME]. + rlim_max); if (hard != RLIM_INFINITY && tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) { @@ -1043,14 +1045,13 @@ static void check_thread_timers(struct task_struct *tsk, __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); return; } - if (tsk->rt.timeout > DIV_ROUND_UP(*soft, USEC_PER_SEC/HZ)) { + if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) { /* * At the soft limit, send a SIGXCPU every second. */ - if (sig->rlim[RLIMIT_RTTIME].rlim_cur - < sig->rlim[RLIMIT_RTTIME].rlim_max) { - sig->rlim[RLIMIT_RTTIME].rlim_cur += - USEC_PER_SEC; + if (soft < hard) { + soft += USEC_PER_SEC; + sig->rlim[RLIMIT_RTTIME].rlim_cur = soft; } printk(KERN_INFO "RT Watchdog Timeout: %s[%d]\n", @@ -1121,13 +1122,14 @@ static void check_process_timers(struct task_struct *tsk, unsigned long long sum_sched_runtime, sched_expires; struct list_head *timers = sig->cpu_timers; struct task_cputime cputime; + unsigned long cpu_cur_lim = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); /* * Don't sample the current process CPU clocks if there are no timers. */ if (list_empty(&timers[CPUCLOCK_PROF]) && cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) && - sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && + cpu_cur_lim == RLIM_INFINITY && list_empty(&timers[CPUCLOCK_VIRT]) && cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && list_empty(&timers[CPUCLOCK_SCHED])) { @@ -1194,10 +1196,12 @@ static void check_process_timers(struct task_struct *tsk, check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime, SIGVTALRM); - if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { + if (cpu_cur_lim != RLIM_INFINITY) { unsigned long psecs = cputime_to_secs(ptime); + unsigned long hard = + ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max); cputime_t x; - if (psecs >= sig->rlim[RLIMIT_CPU].rlim_max) { + if (psecs >= hard) { /* * At the hard limit, we just die. * No need to calculate anything else now. @@ -1205,17 +1209,17 @@ static void check_process_timers(struct task_struct *tsk, __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); return; } - if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) { + if (psecs >= cpu_cur_lim) { /* * At the soft limit, send a SIGXCPU every second. */ __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); - if (sig->rlim[RLIMIT_CPU].rlim_cur - < sig->rlim[RLIMIT_CPU].rlim_max) { - sig->rlim[RLIMIT_CPU].rlim_cur++; + if (cpu_cur_lim < hard) { + cpu_cur_lim++; + sig->rlim[RLIMIT_CPU].rlim_cur = cpu_cur_lim; } } - x = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); + x = secs_to_cputime(cpu_cur_lim); if (cputime_eq(prof_expires, cputime_zero) || cputime_lt(x, prof_expires)) { prof_expires = x; @@ -1382,7 +1386,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) return 1; } - return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; + return ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur) != RLIM_INFINITY; } /* @@ -1480,7 +1484,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, * If the RLIMIT_CPU timer will expire before the * ITIMER_PROF timer, we have nothing else to do. */ - if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur + if (task_rlimit(tsk, RLIMIT_CPU) < cputime_to_secs(*newval)) return; } diff --git a/kernel/sched.c b/kernel/sched.c index e7f2cfa6a257..11d7a009cff0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6118,7 +6118,7 @@ int can_nice(const struct task_struct *p, const int nice) /* convert nice value [19,-20] to rlimit style value [1,40] */ int nice_rlim = 20 - nice; - return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || + return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || capable(CAP_SYS_NICE)); } @@ -6295,7 +6295,7 @@ recheck: if (!lock_task_sighand(p, &flags)) return -ESRCH; - rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur; + rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO); unlock_task_sighand(p, &flags); /* can't set/change the rt policy */ diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 5c5fef378415..b065ada8e8d2 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1670,8 +1670,9 @@ static void watchdog(struct rq *rq, struct task_struct *p) if (!p->signal) return; - soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur; - hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max; + /* max may change after cur was read, this will be fixed next tick */ + soft = task_rlimit(p, RLIMIT_RTTIME); + hard = task_rlimit_max(p, RLIMIT_RTTIME); if (soft != RLIM_INFINITY) { unsigned long next; diff --git a/kernel/signal.c b/kernel/signal.c index 6b982f2cf524..e16f3a04ea1d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -228,7 +228,7 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi if (override_rlimit || atomic_read(&user->sigpending) <= - t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) { + task_rlimit(t, RLIMIT_SIGPENDING)) { q = kmem_cache_alloc(sigqueue_cachep, flags); } else { print_dropped_signal(sig); diff --git a/kernel/sys.c b/kernel/sys.c index 585d6cd10040..98be1a2d7c55 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -572,8 +572,7 @@ static int set_user(struct cred *new) return -EINVAL; } - if (atomic_read(&new_user->processes) >= - current->signal->rlim[RLIMIT_NPROC].rlim_cur && + if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && new_user != INIT_USER) { free_uid(new_user); return -EAGAIN; @@ -1212,6 +1211,61 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) } } +static int check_prlimit_permission(struct task_struct *task) +{ + const struct cred *cred = current_cred(), *tcred; + int ret = 0; + + rcu_read_lock(); + tcred = __task_cred(task); + if ((cred->uid != tcred->euid || + cred->uid != tcred->suid || + cred->uid != tcred->uid || + cred->gid != tcred->egid || + cred->gid != tcred->sgid || + cred->gid != tcred->gid) && + !capable(CAP_SYS_RESOURCE)) { + ret = -EPERM; + } + rcu_read_unlock(); + return ret; +} + +SYSCALL_DEFINE3(getprlimit, pid_t, pid, unsigned int, resource, + struct rlimit __user *, rlim) +{ + struct rlimit val; + struct task_struct *tsk; + int ret; + + if (resource >= RLIM_NLIMITS) + return -EINVAL; + + read_lock(&tasklist_lock); + + tsk = find_task_by_vpid(pid); + if (!tsk || !tsk->sighand) { + ret = -ESRCH; + goto err_unlock; + } + + ret = check_prlimit_permission(tsk); + if (ret) + goto err_unlock; + + task_lock(tsk->group_leader); + val = tsk->signal->rlim[resource]; + task_unlock(tsk->group_leader); + + read_unlock(&tasklist_lock); + + return copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0; +err_unlock: + read_unlock(&tasklist_lock); + return ret; +} + + #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT /* @@ -1237,43 +1291,50 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, #endif -SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) +/* make sure you are allowed to change @tsk limits before calling this */ +int do_setrlimit(struct task_struct *tsk, unsigned int resource, + struct rlimit *new_rlim) { - struct rlimit new_rlim, *old_rlim; - int retval; + struct rlimit *old_rlim; + int retval = 0; - if (resource >= RLIM_NLIMITS) + if (new_rlim->rlim_cur > new_rlim->rlim_max) return -EINVAL; - if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) - return -EFAULT; - if (new_rlim.rlim_cur > new_rlim.rlim_max) - return -EINVAL; - old_rlim = current->signal->rlim + resource; - if ((new_rlim.rlim_max > old_rlim->rlim_max) && - !capable(CAP_SYS_RESOURCE)) - return -EPERM; - if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open) + if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open) return -EPERM; - retval = security_task_setrlimit(resource, &new_rlim); - if (retval) - return retval; + /* optimization: 'current' doesn't need locking, e.g. setrlimit */ + if (tsk != current) { + /* protect tsk->signal and tsk->sighand from disappearing */ + read_lock(&tasklist_lock); + if (!tsk->sighand) { + retval = -ESRCH; + goto out; + } + } - if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) { + if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { /* * The caller is asking for an immediate RLIMIT_CPU * expiry. But we use the zero value to mean "it was * never set". So let's cheat and make it one second * instead */ - new_rlim.rlim_cur = 1; + new_rlim->rlim_cur = 1; } - task_lock(current->group_leader); - *old_rlim = new_rlim; - task_unlock(current->group_leader); - - if (resource != RLIMIT_CPU) + old_rlim = tsk->signal->rlim + resource; + task_lock(tsk->group_leader); + if ((new_rlim->rlim_max > old_rlim->rlim_max) && + !capable(CAP_SYS_RESOURCE)) + retval = -EPERM; + if (!retval) + retval = security_task_setrlimit(tsk, resource, new_rlim); + if (!retval) + *old_rlim = *new_rlim; + task_unlock(tsk->group_leader); + + if (retval || resource != RLIMIT_CPU) goto out; /* @@ -1282,12 +1343,56 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) * very long-standing error, and fixing it now risks breakage of * applications, so we live with it */ - if (new_rlim.rlim_cur == RLIM_INFINITY) + if (new_rlim->rlim_cur == RLIM_INFINITY) goto out; - update_rlimit_cpu(new_rlim.rlim_cur); + update_rlimit_cpu(tsk, new_rlim->rlim_cur); out: - return 0; + if (tsk != current) + read_unlock(&tasklist_lock); + return retval; +} + +SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) +{ + struct rlimit new_rlim; + + if (resource >= RLIM_NLIMITS) + return -EINVAL; + if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) + return -EFAULT; + return do_setrlimit(current, resource, &new_rlim); +} + +SYSCALL_DEFINE3(setprlimit, pid_t, pid, unsigned int, resource, + struct rlimit __user *, rlim) +{ + struct task_struct *tsk; + struct rlimit new_rlim; + int ret; + + if (resource >= RLIM_NLIMITS) + return -EINVAL; + + if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) + return -EFAULT; + + rcu_read_lock(); + tsk = find_task_by_vpid(pid); + if (!tsk) { + rcu_read_unlock(); + return -ESRCH; + } + get_task_struct(tsk); + rcu_read_unlock(); + + ret = check_prlimit_permission(tsk); + if (!ret) + ret = do_setrlimit(tsk, resource, &new_rlim); + + put_task_struct(tsk); + + return ret; } /* diff --git a/mm/filemap.c b/mm/filemap.c index 8b4d88f9249e..87dbcb3462be 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1950,7 +1950,7 @@ EXPORT_SYMBOL(iov_iter_single_seg_count); inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk) { struct inode *inode = file->f_mapping->host; - unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + unsigned long limit = rlimit(RLIMIT_FSIZE); if (unlikely(*pos < 0)) return -EINVAL; diff --git a/mm/mlock.c b/mm/mlock.c index bd6f0e466f6c..2efb17db69fe 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -25,7 +25,7 @@ int can_do_mlock(void) { if (capable(CAP_IPC_LOCK)) return 1; - if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0) + if (rlimit(RLIMIT_MEMLOCK) != 0) return 1; return 0; } @@ -490,7 +490,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) locked = len >> PAGE_SHIFT; locked += current->mm->locked_vm; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; /* check against resource limits */ @@ -553,7 +553,7 @@ SYSCALL_DEFINE1(mlockall, int, flags) down_write(¤t->mm->mmap_sem); - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; ret = -ENOMEM; @@ -587,7 +587,7 @@ int user_shm_lock(size_t size, struct user_struct *user) int allowed = 0; locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit = rlimit(RLIMIT_MEMLOCK); if (lock_limit == RLIM_INFINITY) allowed = 1; lock_limit >>= PAGE_SHIFT; @@ -621,12 +621,12 @@ int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, down_write(&mm->mmap_sem); - lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + lim = ACCESS_ONCE(rlim[RLIMIT_AS].rlim_cur) >> PAGE_SHIFT; vm = mm->total_vm + pgsz; if (lim < vm) goto out; - lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + lim = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur) >> PAGE_SHIFT; vm = mm->locked_vm + pgsz; if (lim < vm) goto out; diff --git a/mm/mmap.c b/mm/mmap.c index 292ddc3cef9c..aee8df8e058d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -265,7 +265,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) * segment grow beyond its set limit the in case where the limit is * not page aligned -Ram Gupta */ - rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; + rlim = rlimit(RLIMIT_DATA); if (rlim < RLIM_INFINITY && (brk - mm->start_brk) + (mm->end_data - mm->start_data) > rlim) goto out; @@ -989,7 +989,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long locked, lock_limit; locked = len >> PAGE_SHIFT; locked += mm->locked_vm; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; @@ -1561,7 +1561,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns return -ENOMEM; /* Stack limit test */ - if (size > rlim[RLIMIT_STACK].rlim_cur) + if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) return -ENOMEM; /* mlock limit tests */ @@ -1569,7 +1569,8 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns unsigned long locked; unsigned long limit; locked = mm->locked_vm + grow; - limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur); + limit >>= PAGE_SHIFT; if (locked > limit && !capable(CAP_IPC_LOCK)) return -ENOMEM; } @@ -2022,7 +2023,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) unsigned long locked, lock_limit; locked = len >> PAGE_SHIFT; locked += mm->locked_vm; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; @@ -2236,7 +2237,7 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages) unsigned long cur = mm->total_vm; /* pages */ unsigned long lim; - lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT; if (cur + npages > lim) return 0; diff --git a/mm/mremap.c b/mm/mremap.c index 97bff2547719..d6740a131998 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -358,7 +358,7 @@ unsigned long do_mremap(unsigned long addr, if (vma->vm_flags & VM_LOCKED) { unsigned long locked, lock_limit; locked = mm->locked_vm << PAGE_SHIFT; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit = rlimit(RLIMIT_MEMLOCK); locked += new_len - old_len; ret = -EAGAIN; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) diff --git a/security/capability.c b/security/capability.c index 5c700e1a4fd3..10f23a4c84e5 100644 --- a/security/capability.c +++ b/security/capability.c @@ -466,7 +466,8 @@ static int cap_task_getioprio(struct task_struct *p) return 0; } -static int cap_task_setrlimit(unsigned int resource, struct rlimit *new_rlim) +static int cap_task_setrlimit(struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim) { return 0; } diff --git a/security/security.c b/security/security.c index f2d8aa949323..1f163d6e917f 100644 --- a/security/security.c +++ b/security/security.c @@ -826,9 +826,10 @@ int security_task_getioprio(struct task_struct *p) return security_ops->task_getioprio(p); } -int security_task_setrlimit(unsigned int resource, struct rlimit *new_rlim) +int security_task_setrlimit(struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim) { - return security_ops->task_setrlimit(resource, new_rlim); + return security_ops->task_setrlimit(p, resource, new_rlim); } int security_task_setscheduler(struct task_struct *p, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7a374c2eb043..9688ccc73c00 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2365,7 +2365,8 @@ static void selinux_bprm_committing_creds(struct linux_binprm *bprm) initrlim = init_task.signal->rlim + i; rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); } - update_rlimit_cpu(rlim->rlim_cur); + update_rlimit_cpu(current, + current->signal->rlim[RLIMIT_CPU].rlim_cur); } } @@ -3398,16 +3399,17 @@ static int selinux_task_getioprio(struct task_struct *p) return current_has_perm(p, PROCESS__GETSCHED); } -static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim) +static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim) { - struct rlimit *old_rlim = current->signal->rlim + resource; + struct rlimit *old_rlim = p->signal->rlim + resource; /* Control the ability to change the hard limit (whether lowering or raising it), so that the hard limit can later be used as a safe reset point for the soft limit upon context transitions. See selinux_bprm_committing_creds. */ if (old_rlim->rlim_max != new_rlim->rlim_max) - return current_has_perm(current, PROCESS__SETRLIMIT); + return current_has_perm(p, PROCESS__SETRLIMIT); return 0; } |