summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2014-11-28 11:07:12 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2014-11-28 11:07:12 +1100
commit82851b685662aea5494afd6ff9ab2a4a11ce4ce2 (patch)
treea14b87c26c0be7a79f5dcfe42b5d3ace348c9b29
parentd6c98168d8a4cc592f5e8ee8b2612f21824f01fb (diff)
parente56da345eb529370f21f36a52bbb114ba38ad921 (diff)
Merge remote-tracking branch 's390/features'
-rw-r--r--Documentation/s390/Debugging390.txt462
-rw-r--r--arch/s390/include/asm/cmpxchg.h240
-rw-r--r--arch/s390/include/asm/ftrace.h52
-rw-r--r--arch/s390/include/asm/io.h9
-rw-r--r--arch/s390/include/asm/irq.h11
-rw-r--r--arch/s390/include/asm/kprobes.h1
-rw-r--r--arch/s390/include/asm/lowcore.h4
-rw-r--r--arch/s390/include/asm/pci.h5
-rw-r--r--arch/s390/include/asm/pci_io.h6
-rw-r--r--arch/s390/include/asm/pgalloc.h2
-rw-r--r--arch/s390/include/asm/pgtable.h33
-rw-r--r--arch/s390/include/asm/spinlock.h9
-rw-r--r--arch/s390/include/uapi/asm/unistd.h4
-rw-r--r--arch/s390/kernel/asm-offsets.c5
-rw-r--r--arch/s390/kernel/compat_signal.c2
-rw-r--r--arch/s390/kernel/compat_wrapper.c2
-rw-r--r--arch/s390/kernel/dumpstack.c3
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/ftrace.c136
-rw-r--r--arch/s390/kernel/idle.c3
-rw-r--r--arch/s390/kernel/irq.c5
-rw-r--r--arch/s390/kernel/kprobes.c178
-rw-r--r--arch/s390/kernel/mcount.S1
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c1
-rw-r--r--arch/s390/kernel/process.c2
-rw-r--r--arch/s390/kernel/ptrace.c97
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/signal.c2
-rw-r--r--arch/s390/kernel/smp.c1
-rw-r--r--arch/s390/kernel/syscalls.S2
-rw-r--r--arch/s390/kernel/time.c3
-rw-r--r--arch/s390/kernel/traps.c25
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/s390/kvm/priv.c17
-rw-r--r--arch/s390/mm/fault.c10
-rw-r--r--arch/s390/mm/pgtable.c185
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c9
-rw-r--r--arch/s390/pci/pci_clp.c1
-rw-r--r--arch/s390/pci/pci_mmio.c115
-rw-r--r--include/asm-generic/pgtable.h11
-rw-r--r--include/linux/kprobes.h1
-rw-r--r--include/linux/mm.h11
-rw-r--r--kernel/kprobes.c18
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--mm/huge_memory.c5
-rw-r--r--mm/memory.c2
-rw-r--r--scripts/recordmcount.c2
-rwxr-xr-xscripts/recordmcount.pl2
50 files changed, 761 insertions, 948 deletions
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index 462321c1aeea..08911b5c6b0e 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -26,11 +26,6 @@ The Linux for s/390 & z/Architecture Kernel Task Structure
Register Usage & Stackframes on Linux for s/390 & z/Architecture
A sample program with comments
Compiling programs for debugging on Linux for s/390 & z/Architecture
-Figuring out gcc compile errors
-Debugging Tools
-objdump
-strace
-Performance Debugging
Debugging under VM
s/390 & z/Architecture IO Overview
Debugging IO on s/390 & z/Architecture under VM
@@ -114,28 +109,25 @@ s/390 z/Architecture
16-17 16-17 Address Space Control
- 00 Primary Space Mode when DAT on
- The linux kernel currently runs in this mode, CR1 is affiliated with
- this mode & points to the primary segment table origin etc.
-
- 01 Access register mode this mode is used in functions to
- copy data between kernel & user space.
-
- 10 Secondary space mode not used in linux however CR7 the
- register affiliated with this mode is & this & normally
- CR13=CR7 to allow us to copy data between kernel & user space.
- We do this as follows:
- We set ar2 to 0 to designate its
- affiliated gpr ( gpr2 )to point to primary=kernel space.
- We set ar4 to 1 to designate its
- affiliated gpr ( gpr4 ) to point to secondary=home=user space
- & then essentially do a memcopy(gpr2,gpr4,size) to
- copy data between the address spaces, the reason we use home space for the
- kernel & don't keep secondary space free is that code will not run in
- secondary space.
-
- 11 Home Space Mode all user programs run in this mode.
- it is affiliated with CR13.
+ 00 Primary Space Mode:
+ The register CR1 contains the primary address-space control ele-
+ ment (PASCE), which points to the primary space region/segment
+ table origin.
+
+ 01 Access register mode
+
+ 10 Secondary Space Mode:
+ The register CR7 contains the secondary address-space control
+ element (SASCE), which points to the secondary space region or
+ segment table origin.
+
+ 11 Home Space Mode:
+ The register CR13 contains the home space address-space control
+ element (HASCE), which points to the home space region/segment
+ table origin.
+
+ See "Address Spaces on Linux for s/390 & z/Architecture" below
+ for more information about address space usage in Linux.
18-19 18-19 Condition codes (CC)
@@ -249,9 +241,9 @@ currently 4TB of physical memory currently on z/Architecture.
Address Spaces on Linux for s/390 & z/Architecture
==================================================
-Our addressing scheme is as follows
-
+Our addressing scheme is basically as follows:
+ Primary Space Home Space
Himem 0x7fffffff 2GB on s/390 ***************** ****************
currently 0x3ffffffffff (2^42)-1 * User Stack * * *
on z/Architecture. ***************** * *
@@ -264,9 +256,46 @@ on z/Architecture. ***************** * *
* Sections * * *
0x00000000 ***************** ****************
-This also means that we need to look at the PSW problem state bit
-or the addressing mode to decide whether we are looking at
-user or kernel space.
+This also means that we need to look at the PSW problem state bit and the
+addressing mode to decide whether we are looking at user or kernel space.
+
+User space runs in primary address mode (or access register mode within
+the vdso code).
+
+The kernel usually also runs in home space mode, however when accessing
+user space the kernel switches to primary or secondary address mode if
+the mvcos instruction is not available or if a compare-and-swap (futex)
+instruction on a user space address is performed.
+
+When also looking at the ASCE control registers, this means:
+
+User space:
+- runs in primary or access register mode
+- cr1 contains the user asce
+- cr7 contains the user asce
+- cr13 contains the kernel asce
+
+Kernel space:
+- runs in home space mode
+- cr1 contains the user or kernel asce
+ -> the kernel asce is loaded when a uaccess requires primary or
+ secondary address mode
+- cr7 contains the user or kernel asce, (changed with set_fs())
+- cr13 contains the kernel asce
+
+In case of uaccess the kernel changes to:
+- primary space mode in case of a uaccess (copy_to_user) and uses
+ e.g. the mvcp instruction to access user space. However the kernel
+ will stay in home space mode if the mvcos instruction is available
+- secondary space mode in case of futex atomic operations, so that the
+ instructions come from primary address space and data from secondary
+ space
+
+In case of KVM, the kernel runs in home space mode, but cr1 gets switched
+to contain the gmap asce before the SIE instruction gets executed. When
+the SIE instruction is finished, cr1 will be switched back to contain the
+user asce.
+
Virtual Addresses on s/390 & z/Architecture
===========================================
@@ -706,376 +735,7 @@ Debugging with optimisation has since much improved after fixing
some bugs, please make sure you are using gdb-5.0 or later developed
after Nov'2000.
-Figuring out gcc compile errors
-===============================
-If you are getting a lot of syntax errors compiling a program & the problem
-isn't blatantly obvious from the source.
-It often helps to just preprocess the file, this is done with the -E
-option in gcc.
-What this does is that it runs through the very first phase of compilation
-( compilation in gcc is done in several stages & gcc calls many programs to
-achieve its end result ) with the -E option gcc just calls the gcc preprocessor (cpp).
-The c preprocessor does the following, it joins all the files #included together
-recursively ( #include files can #include other files ) & also the c file you wish to compile.
-It puts a fully qualified path of the #included files in a comment & it
-does macro expansion.
-This is useful for debugging because
-1) You can double check whether the files you expect to be included are the ones
-that are being included ( e.g. double check that you aren't going to the i386 asm directory ).
-2) Check that macro definitions aren't clashing with typedefs,
-3) Check that definitions aren't being used before they are being included.
-4) Helps put the line emitting the error under the microscope if it contains macros.
-
-For convenience the Linux kernel's makefile will do preprocessing automatically for you
-by suffixing the file you want built with .i ( instead of .o )
-
-e.g.
-from the linux directory type
-make arch/s390/kernel/signal.i
-this will build
-
-s390-gcc -D__KERNEL__ -I/home1/barrow/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
--fno-strict-aliasing -D__SMP__ -pipe -fno-strength-reduce -E arch/s390/kernel/signal.c
-> arch/s390/kernel/signal.i
-
-Now look at signal.i you should see something like.
-
-
-# 1 "/home1/barrow/linux/include/asm/types.h" 1
-typedef unsigned short umode_t;
-typedef __signed__ char __s8;
-typedef unsigned char __u8;
-typedef __signed__ short __s16;
-typedef unsigned short __u16;
-
-If instead you are getting errors further down e.g.
-unknown instruction:2515 "move.l" or better still unknown instruction:2515
-"Fixme not implemented yet, call Martin" you are probably are attempting to compile some code
-meant for another architecture or code that is simply not implemented, with a fixme statement
-stuck into the inline assembly code so that the author of the file now knows he has work to do.
-To look at the assembly emitted by gcc just before it is about to call gas ( the gnu assembler )
-use the -S option.
-Again for your convenience the Linux kernel's Makefile will hold your hand &
-do all this donkey work for you also by building the file with the .s suffix.
-e.g.
-from the Linux directory type
-make arch/s390/kernel/signal.s
-
-s390-gcc -D__KERNEL__ -I/home1/barrow/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
--fno-strict-aliasing -D__SMP__ -pipe -fno-strength-reduce -S arch/s390/kernel/signal.c
--o arch/s390/kernel/signal.s
-
-
-This will output something like, ( please note the constant pool & the useful comments
-in the prologue to give you a hand at interpreting it ).
-
-.LC54:
- .string "misaligned (__u16 *) in __xchg\n"
-.LC57:
- .string "misaligned (__u32 *) in __xchg\n"
-.L$PG1: # Pool sys_sigsuspend
-.LC192:
- .long -262401
-.LC193:
- .long -1
-.LC194:
- .long schedule-.L$PG1
-.LC195:
- .long do_signal-.L$PG1
- .align 4
-.globl sys_sigsuspend
- .type sys_sigsuspend,@function
-sys_sigsuspend:
-# leaf function 0
-# automatics 16
-# outgoing args 0
-# need frame pointer 0
-# call alloca 0
-# has varargs 0
-# incoming args (stack) 0
-# function length 168
- STM 8,15,32(15)
- LR 0,15
- AHI 15,-112
- BASR 13,0
-.L$CO1: AHI 13,.L$PG1-.L$CO1
- ST 0,0(15)
- LR 8,2
- N 5,.LC192-.L$PG1(13)
-
-Adding -g to the above output makes the output even more useful
-e.g. typing
-make CC:="s390-gcc -g" kernel/sched.s
-
-which compiles.
-s390-gcc -g -D__KERNEL__ -I/home/barrow/linux-2.3/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strict-aliasing -pipe -fno-strength-reduce -S kernel/sched.c -o kernel/sched.s
-
-also outputs stabs ( debugger ) info, from this info you can find out the
-offsets & sizes of various elements in structures.
-e.g. the stab for the structure
-struct rlimit {
- unsigned long rlim_cur;
- unsigned long rlim_max;
-};
-is
-.stabs "rlimit:T(151,2)=s8rlim_cur:(0,5),0,32;rlim_max:(0,5),32,32;;",128,0,0,0
-from this stab you can see that
-rlimit_cur starts at bit offset 0 & is 32 bits in size
-rlimit_max starts at bit offset 32 & is 32 bits in size.
-
-
-Debugging Tools:
-================
-
-objdump
-=======
-This is a tool with many options the most useful being ( if compiled with -g).
-objdump --source <victim program or object file> > <victims debug listing >
-
-
-The whole kernel can be compiled like this ( Doing this will make a 17MB kernel
-& a 200 MB listing ) however you have to strip it before building the image
-using the strip command to make it a more reasonable size to boot it.
-
-A source/assembly mixed dump of the kernel can be done with the line
-objdump --source vmlinux > vmlinux.lst
-Also, if the file isn't compiled -g, this will output as much debugging information
-as it can (e.g. function names). This is very slow as it spends lots
-of time searching for debugging info. The following self explanatory line should be used
-instead if the code isn't compiled -g, as it is much faster:
-objdump --disassemble-all --syms vmlinux > vmlinux.lst
-
-As hard drive space is valuable most of us use the following approach.
-1) Look at the emitted psw on the console to find the crash address in the kernel.
-2) Look at the file System.map ( in the linux directory ) produced when building
-the kernel to find the closest address less than the current PSW to find the
-offending function.
-3) use grep or similar to search the source tree looking for the source file
- with this function if you don't know where it is.
-4) rebuild this object file with -g on, as an example suppose the file was
-( /arch/s390/kernel/signal.o )
-5) Assuming the file with the erroneous function is signal.c Move to the base of the
-Linux source tree.
-6) rm /arch/s390/kernel/signal.o
-7) make /arch/s390/kernel/signal.o
-8) watch the gcc command line emitted
-9) type it in again or alternatively cut & paste it on the console adding the -g option.
-10) objdump --source arch/s390/kernel/signal.o > signal.lst
-This will output the source & the assembly intermixed, as the snippet below shows
-This will unfortunately output addresses which aren't the same
-as the kernel ones you should be able to get around the mental arithmetic
-by playing with the --adjust-vma parameter to objdump.
-
-
-
-
-static inline void spin_lock(spinlock_t *lp)
-{
- a0: 18 34 lr %r3,%r4
- a2: a7 3a 03 bc ahi %r3,956
- __asm__ __volatile(" lhi 1,-1\n"
- a6: a7 18 ff ff lhi %r1,-1
- aa: 1f 00 slr %r0,%r0
- ac: ba 01 30 00 cs %r0,%r1,0(%r3)
- b0: a7 44 ff fd jm aa <sys_sigsuspend+0x2e>
- saveset = current->blocked;
- b4: d2 07 f0 68 mvc 104(8,%r15),972(%r4)
- b8: 43 cc
- return (set->sig[0] & mask) != 0;
-}
-
-6) If debugging under VM go down to that section in the document for more info.
-
-
-I now have a tool which takes the pain out of --adjust-vma
-& you are able to do something like
-make /arch/s390/kernel/traps.lst
-& it automatically generates the correctly relocated entries for
-the text segment in traps.lst.
-This tool is now standard in linux distro's in scripts/makelst
-
-strace:
--------
-Q. What is it ?
-A. It is a tool for intercepting calls to the kernel & logging them
-to a file & on the screen.
-
-Q. What use is it ?
-A. You can use it to find out what files a particular program opens.
-
-
-Example 1
----------
-If you wanted to know does ping work but didn't have the source
-strace ping -c 1 127.0.0.1
-& then look at the man pages for each of the syscalls below,
-( In fact this is sometimes easier than looking at some spaghetti
-source which conditionally compiles for several architectures ).
-Not everything that it throws out needs to make sense immediately.
-
-Just looking quickly you can see that it is making up a RAW socket
-for the ICMP protocol.
-Doing an alarm(10) for a 10 second timeout
-& doing a gettimeofday call before & after each read to see
-how long the replies took, & writing some text to stdout so the user
-has an idea what is going on.
-
-socket(PF_INET, SOCK_RAW, IPPROTO_ICMP) = 3
-getuid() = 0
-setuid(0) = 0
-stat("/usr/share/locale/C/libc.cat", 0xbffff134) = -1 ENOENT (No such file or directory)
-stat("/usr/share/locale/libc/C", 0xbffff134) = -1 ENOENT (No such file or directory)
-stat("/usr/local/share/locale/C/libc.cat", 0xbffff134) = -1 ENOENT (No such file or directory)
-getpid() = 353
-setsockopt(3, SOL_SOCKET, SO_BROADCAST, [1], 4) = 0
-setsockopt(3, SOL_SOCKET, SO_RCVBUF, [49152], 4) = 0
-fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(3, 1), ...}) = 0
-mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40008000
-ioctl(1, TCGETS, {B9600 opost isig icanon echo ...}) = 0
-write(1, "PING 127.0.0.1 (127.0.0.1): 56 d"..., 42PING 127.0.0.1 (127.0.0.1): 56 data bytes
-) = 42
-sigaction(SIGINT, {0x8049ba0, [], SA_RESTART}, {SIG_DFL}) = 0
-sigaction(SIGALRM, {0x8049600, [], SA_RESTART}, {SIG_DFL}) = 0
-gettimeofday({948904719, 138951}, NULL) = 0
-sendto(3, "\10\0D\201a\1\0\0\17#\2178\307\36"..., 64, 0, {sin_family=AF_INET,
-sin_port=htons(0), sin_addr=inet_addr("127.0.0.1")}, 16) = 64
-sigaction(SIGALRM, {0x8049600, [], SA_RESTART}, {0x8049600, [], SA_RESTART}) = 0
-sigaction(SIGALRM, {0x8049ba0, [], SA_RESTART}, {0x8049600, [], SA_RESTART}) = 0
-alarm(10) = 0
-recvfrom(3, "E\0\0T\0005\0\0@\1|r\177\0\0\1\177"..., 192, 0,
-{sin_family=AF_INET, sin_port=htons(50882), sin_addr=inet_addr("127.0.0.1")}, [16]) = 84
-gettimeofday({948904719, 160224}, NULL) = 0
-recvfrom(3, "E\0\0T\0006\0\0\377\1\275p\177\0"..., 192, 0,
-{sin_family=AF_INET, sin_port=htons(50882), sin_addr=inet_addr("127.0.0.1")}, [16]) = 84
-gettimeofday({948904719, 166952}, NULL) = 0
-write(1, "64 bytes from 127.0.0.1: icmp_se"...,
-5764 bytes from 127.0.0.1: icmp_seq=0 ttl=255 time=28.0 ms
-
-Example 2
----------
-strace passwd 2>&1 | grep open
-produces the following output
-open("/etc/ld.so.cache", O_RDONLY) = 3
-open("/opt/kde/lib/libc.so.5", O_RDONLY) = -1 ENOENT (No such file or directory)
-open("/lib/libc.so.5", O_RDONLY) = 3
-open("/dev", O_RDONLY) = 3
-open("/var/run/utmp", O_RDONLY) = 3
-open("/etc/passwd", O_RDONLY) = 3
-open("/etc/shadow", O_RDONLY) = 3
-open("/etc/login.defs", O_RDONLY) = 4
-open("/dev/tty", O_RDONLY) = 4
-
-The 2>&1 is done to redirect stderr to stdout & grep is then filtering this input
-through the pipe for each line containing the string open.
-
-
-Example 3
----------
-Getting sophisticated
-telnetd crashes & I don't know why
-
-Steps
------
-1) Replace the following line in /etc/inetd.conf
-telnet stream tcp nowait root /usr/sbin/in.telnetd -h
-with
-telnet stream tcp nowait root /blah
-
-2) Create the file /blah with the following contents to start tracing telnetd
-#!/bin/bash
-/usr/bin/strace -o/t1 -f /usr/sbin/in.telnetd -h
-3) chmod 700 /blah to make it executable only to root
-4)
-killall -HUP inetd
-or ps aux | grep inetd
-get inetd's process id
-& kill -HUP inetd to restart it.
-
-Important options
------------------
--o is used to tell strace to output to a file in our case t1 in the root directory
--f is to follow children i.e.
-e.g in our case above telnetd will start the login process & subsequently a shell like bash.
-You will be able to tell which is which from the process ID's listed on the left hand side
-of the strace output.
--p<pid> will tell strace to attach to a running process, yup this can be done provided
- it isn't being traced or debugged already & you have enough privileges,
-the reason 2 processes cannot trace or debug the same program is that strace
-becomes the parent process of the one being debugged & processes ( unlike people )
-can have only one parent.
-
-
-However the file /t1 will get big quite quickly
-to test it telnet 127.0.0.1
-
-now look at what files in.telnetd execve'd
-413 execve("/usr/sbin/in.telnetd", ["/usr/sbin/in.telnetd", "-h"], [/* 17 vars */]) = 0
-414 execve("/bin/login", ["/bin/login", "-h", "localhost", "-p"], [/* 2 vars */]) = 0
-
-Whey it worked!.
-
-
-Other hints:
-------------
-If the program is not very interactive ( i.e. not much keyboard input )
-& is crashing in one architecture but not in another you can do
-an strace of both programs under as identical a scenario as you can
-on both architectures outputting to a file then.
-do a diff of the two traces using the diff program
-i.e.
-diff output1 output2
-& maybe you'll be able to see where the call paths differed, this
-is possibly near the cause of the crash.
-
-More info
----------
-Look at man pages for strace & the various syscalls
-e.g. man strace, man alarm, man socket.
-
-
-Performance Debugging
-=====================
-gcc is capable of compiling in profiling code just add the -p option
-to the CFLAGS, this obviously affects program size & performance.
-This can be used by the gprof gnu profiling tool or the
-gcov the gnu code coverage tool ( code coverage is a means of testing
-code quality by checking if all the code in an executable in exercised by
-a tester ).
-
-
-Using top to find out where processes are sleeping in the kernel
-----------------------------------------------------------------
-To do this copy the System.map from the root directory where
-the linux kernel was built to the /boot directory on your
-linux machine.
-Start top
-Now type fU<return>
-You should see a new field called WCHAN which
-tells you where each process is sleeping here is a typical output.
-
- 6:59pm up 41 min, 1 user, load average: 0.00, 0.00, 0.00
-28 processes: 27 sleeping, 1 running, 0 zombie, 0 stopped
-CPU states: 0.0% user, 0.1% system, 0.0% nice, 99.8% idle
-Mem: 254900K av, 45976K used, 208924K free, 0K shrd, 28636K buff
-Swap: 0K av, 0K used, 0K free 8620K cached
-
- PID USER PRI NI SIZE RSS SHARE WCHAN STAT LIB %CPU %MEM TIME COMMAND
- 750 root 12 0 848 848 700 do_select S 0 0.1 0.3 0:00 in.telnetd
- 767 root 16 0 1140 1140 964 R 0 0.1 0.4 0:00 top
- 1 root 8 0 212 212 180 do_select S 0 0.0 0.0 0:00 init
- 2 root 9 0 0 0 0 down_inte SW 0 0.0 0.0 0:00 kmcheck
-
-The time command
-----------------
-Another related command is the time command which gives you an indication
-of where a process is spending the majority of its time.
-e.g.
-time ping -c 5 nc
-outputs
-real 0m4.054s
-user 0m0.010s
-sys 0m0.010s
Debugging under VM
==================
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 4236408070e5..6259895fcd97 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -11,200 +11,28 @@
#include <linux/types.h>
#include <linux/bug.h>
-extern void __xchg_called_with_bad_pointer(void);
-
-static inline unsigned long __xchg(unsigned long x, void *ptr, int size)
-{
- unsigned long addr, old;
- int shift;
-
- switch (size) {
- case 1:
- addr = (unsigned long) ptr;
- shift = (3 ^ (addr & 3)) << 3;
- addr ^= addr & 3;
- asm volatile(
- " l %0,%4\n"
- "0: lr 0,%0\n"
- " nr 0,%3\n"
- " or 0,%2\n"
- " cs %0,0,%4\n"
- " jl 0b\n"
- : "=&d" (old), "=Q" (*(int *) addr)
- : "d" ((x & 0xff) << shift), "d" (~(0xff << shift)),
- "Q" (*(int *) addr) : "memory", "cc", "0");
- return old >> shift;
- case 2:
- addr = (unsigned long) ptr;
- shift = (2 ^ (addr & 2)) << 3;
- addr ^= addr & 2;
- asm volatile(
- " l %0,%4\n"
- "0: lr 0,%0\n"
- " nr 0,%3\n"
- " or 0,%2\n"
- " cs %0,0,%4\n"
- " jl 0b\n"
- : "=&d" (old), "=Q" (*(int *) addr)
- : "d" ((x & 0xffff) << shift), "d" (~(0xffff << shift)),
- "Q" (*(int *) addr) : "memory", "cc", "0");
- return old >> shift;
- case 4:
- asm volatile(
- " l %0,%3\n"
- "0: cs %0,%2,%3\n"
- " jl 0b\n"
- : "=&d" (old), "=Q" (*(int *) ptr)
- : "d" (x), "Q" (*(int *) ptr)
- : "memory", "cc");
- return old;
-#ifdef CONFIG_64BIT
- case 8:
- asm volatile(
- " lg %0,%3\n"
- "0: csg %0,%2,%3\n"
- " jl 0b\n"
- : "=&d" (old), "=m" (*(long *) ptr)
- : "d" (x), "Q" (*(long *) ptr)
- : "memory", "cc");
- return old;
-#endif /* CONFIG_64BIT */
- }
- __xchg_called_with_bad_pointer();
- return x;
-}
-
-#define xchg(ptr, x) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __ret = (__typeof__(*(ptr))) \
- __xchg((unsigned long)(x), (void *)(ptr), sizeof(*(ptr)));\
- __ret; \
+#define cmpxchg(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __o = (o); \
+ __typeof__(*(ptr)) __n = (n); \
+ (__typeof__(*(ptr))) __sync_val_compare_and_swap((ptr),__o,__n);\
})
-/*
- * Atomic compare and exchange. Compare OLD with MEM, if identical,
- * store NEW in MEM. Return the initial value in MEM. Success is
- * indicated by comparing RETURN with OLD.
- */
-
-#define __HAVE_ARCH_CMPXCHG
-
-extern void __cmpxchg_called_with_bad_pointer(void);
-
-static inline unsigned long __cmpxchg(void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- unsigned long addr, prev, tmp;
- int shift;
-
- switch (size) {
- case 1:
- addr = (unsigned long) ptr;
- shift = (3 ^ (addr & 3)) << 3;
- addr ^= addr & 3;
- asm volatile(
- " l %0,%2\n"
- "0: nr %0,%5\n"
- " lr %1,%0\n"
- " or %0,%3\n"
- " or %1,%4\n"
- " cs %0,%1,%2\n"
- " jnl 1f\n"
- " xr %1,%0\n"
- " nr %1,%5\n"
- " jnz 0b\n"
- "1:"
- : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr)
- : "d" ((old & 0xff) << shift),
- "d" ((new & 0xff) << shift),
- "d" (~(0xff << shift))
- : "memory", "cc");
- return prev >> shift;
- case 2:
- addr = (unsigned long) ptr;
- shift = (2 ^ (addr & 2)) << 3;
- addr ^= addr & 2;
- asm volatile(
- " l %0,%2\n"
- "0: nr %0,%5\n"
- " lr %1,%0\n"
- " or %0,%3\n"
- " or %1,%4\n"
- " cs %0,%1,%2\n"
- " jnl 1f\n"
- " xr %1,%0\n"
- " nr %1,%5\n"
- " jnz 0b\n"
- "1:"
- : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr)
- : "d" ((old & 0xffff) << shift),
- "d" ((new & 0xffff) << shift),
- "d" (~(0xffff << shift))
- : "memory", "cc");
- return prev >> shift;
- case 4:
- asm volatile(
- " cs %0,%3,%1\n"
- : "=&d" (prev), "=Q" (*(int *) ptr)
- : "0" (old), "d" (new), "Q" (*(int *) ptr)
- : "memory", "cc");
- return prev;
-#ifdef CONFIG_64BIT
- case 8:
- asm volatile(
- " csg %0,%3,%1\n"
- : "=&d" (prev), "=Q" (*(long *) ptr)
- : "0" (old), "d" (new), "Q" (*(long *) ptr)
- : "memory", "cc");
- return prev;
-#endif /* CONFIG_64BIT */
- }
- __cmpxchg_called_with_bad_pointer();
- return old;
-}
-
-#define cmpxchg(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __ret = (__typeof__(*(ptr))) \
- __cmpxchg((ptr), (unsigned long)(o), (unsigned long)(n), \
- sizeof(*(ptr))); \
- __ret; \
-})
+#define cmpxchg64 cmpxchg
+#define cmpxchg_local cmpxchg
+#define cmpxchg64_local cmpxchg
-#ifdef CONFIG_64BIT
-#define cmpxchg64(ptr, o, n) \
+#define xchg(ptr, x) \
({ \
- cmpxchg((ptr), (o), (n)); \
+ __typeof__(ptr) __ptr = (ptr); \
+ __typeof__(*(ptr)) __old; \
+ do { \
+ __old = *__ptr; \
+ } while (!__sync_bool_compare_and_swap(__ptr, __old, x)); \
+ __old; \
})
-#else /* CONFIG_64BIT */
-static inline unsigned long long __cmpxchg64(void *ptr,
- unsigned long long old,
- unsigned long long new)
-{
- register_pair rp_old = {.pair = old};
- register_pair rp_new = {.pair = new};
- unsigned long long *ullptr = ptr;
- asm volatile(
- " cds %0,%2,%1"
- : "+d" (rp_old), "+Q" (*ullptr)
- : "d" (rp_new)
- : "memory", "cc");
- return rp_old.pair;
-}
-
-#define cmpxchg64(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __ret = (__typeof__(*(ptr))) \
- __cmpxchg64((ptr), \
- (unsigned long long)(o), \
- (unsigned long long)(n)); \
- __ret; \
-})
-#endif /* CONFIG_64BIT */
+#define __HAVE_ARCH_CMPXCHG
#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
({ \
@@ -265,40 +93,4 @@ extern void __cmpxchg_double_called_with_bad_pointer(void);
#define system_has_cmpxchg_double() 1
-#include <asm-generic/cmpxchg-local.h>
-
-static inline unsigned long __cmpxchg_local(void *ptr,
- unsigned long old,
- unsigned long new, int size)
-{
- switch (size) {
- case 1:
- case 2:
- case 4:
-#ifdef CONFIG_64BIT
- case 8:
-#endif
- return __cmpxchg(ptr, old, new, size);
- default:
- return __cmpxchg_local_generic(ptr, old, new, size);
- }
-
- return old;
-}
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __ret = (__typeof__(*(ptr))) \
- __cmpxchg_local((ptr), (unsigned long)(o), \
- (unsigned long)(n), sizeof(*(ptr))); \
- __ret; \
-})
-
-#define cmpxchg64_local(ptr, o, n) cmpxchg64((ptr), (o), (n))
-
#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 3aef8afec336..785041f1dc77 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -1,25 +1,67 @@
#ifndef _ASM_S390_FTRACE_H
#define _ASM_S390_FTRACE_H
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+
+#define MCOUNT_INSN_SIZE 24
+#define MCOUNT_RETURN_FIXUP 18
+
#ifndef __ASSEMBLY__
-extern void _mcount(void);
+void _mcount(void);
+void ftrace_caller(void);
+
extern char ftrace_graph_caller_end;
+extern unsigned long ftrace_plt;
struct dyn_arch_ftrace { };
-#define MCOUNT_ADDR ((long)_mcount)
+#define MCOUNT_ADDR ((unsigned long)_mcount)
+#define FTRACE_ADDR ((unsigned long)ftrace_caller)
+#define KPROBE_ON_FTRACE_NOP 0
+#define KPROBE_ON_FTRACE_CALL 1
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr;
}
-#endif /* __ASSEMBLY__ */
+struct ftrace_insn {
+ u16 opc;
+ s32 disp;
+} __packed;
-#define MCOUNT_INSN_SIZE 18
+static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+ /* jg .+24 */
+ insn->opc = 0xc0f4;
+ insn->disp = MCOUNT_INSN_SIZE / 2;
+#endif
+}
-#define ARCH_SUPPORTS_FTRACE_OPS 1
+static inline int is_ftrace_nop(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+ if (insn->disp == MCOUNT_INSN_SIZE / 2)
+ return 1;
+#endif
+ return 0;
+}
+
+static inline void ftrace_generate_call_insn(struct ftrace_insn *insn,
+ unsigned long ip)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+ unsigned long target;
+ /* brasl r0,ftrace_caller */
+ target = is_module_addr((void *) ip) ? ftrace_plt : FTRACE_ADDR;
+ insn->opc = 0xc005;
+ insn->disp = (target - ip) / 2;
+#endif
+}
+
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_FTRACE_H */
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index 6ad9013c67e7..30fd5c84680e 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -39,6 +39,15 @@ static inline void iounmap(volatile void __iomem *addr)
{
}
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+ return NULL;
+}
+
+static inline void ioport_unmap(void __iomem *p)
+{
+}
+
/*
* s390 needs a private implementation of pci_iomap since ioremap with its
* offset parameter isn't sufficient. That's because BAR spaces are not
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index b0d5f0a97a01..343ea7c987aa 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -1,11 +1,11 @@
#ifndef _ASM_IRQ_H
#define _ASM_IRQ_H
-#define EXT_INTERRUPT 1
-#define IO_INTERRUPT 2
-#define THIN_INTERRUPT 3
+#define EXT_INTERRUPT 0
+#define IO_INTERRUPT 1
+#define THIN_INTERRUPT 2
-#define NR_IRQS_BASE 4
+#define NR_IRQS_BASE 3
#ifdef CONFIG_PCI_NR_MSI
# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
@@ -13,9 +13,6 @@
# define NR_IRQS NR_IRQS_BASE
#endif
-/* This number is used when no interrupt has been assigned */
-#define NO_IRQ 0
-
/* External interruption codes */
#define EXT_IRQ_INTERRUPT_KEY 0x0040
#define EXT_IRQ_CLK_COMP 0x1004
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 98629173ce3b..b47ad3b642cc 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -60,6 +60,7 @@ typedef u16 kprobe_opcode_t;
struct arch_specific_insn {
/* copy of original instruction */
kprobe_opcode_t *insn;
+ unsigned int is_ftrace_insn : 1;
};
struct prev_kprobe {
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 6cc51fe84410..34fbcac61133 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -147,7 +147,7 @@ struct _lowcore {
__u32 softirq_pending; /* 0x02ec */
__u32 percpu_offset; /* 0x02f0 */
__u32 machine_flags; /* 0x02f4 */
- __u32 ftrace_func; /* 0x02f8 */
+ __u8 pad_0x02f8[0x02fc-0x02f8]; /* 0x02f8 */
__u32 spinlock_lockval; /* 0x02fc */
__u8 pad_0x0300[0x0e00-0x0300]; /* 0x0300 */
@@ -297,7 +297,7 @@ struct _lowcore {
__u64 percpu_offset; /* 0x0378 */
__u64 vdso_per_cpu_data; /* 0x0380 */
__u64 machine_flags; /* 0x0388 */
- __u64 ftrace_func; /* 0x0390 */
+ __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */
__u64 gmap; /* 0x0398 */
__u32 spinlock_lockval; /* 0x03a0 */
__u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c030900320e0..ef803c202d42 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -50,10 +50,6 @@ struct zpci_fmb {
atomic64_t unmapped_pages;
} __packed __aligned(16);
-#define ZPCI_MSI_VEC_BITS 11
-#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS)
-#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1)
-
enum zpci_state {
ZPCI_FN_STATE_RESERVED,
ZPCI_FN_STATE_STANDBY,
@@ -90,6 +86,7 @@ struct zpci_dev {
/* IRQ stuff */
u64 msi_addr; /* MSI address */
+ unsigned int max_msi; /* maximum number of MSI's */
struct airq_iv *aibv; /* adapter interrupt bit vector */
unsigned int aisb; /* number of the summary bit */
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index d194d544d694..f664e96f48c7 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -139,7 +139,8 @@ static inline int zpci_memcpy_fromio(void *dst,
int size, rc = 0;
while (n > 0) {
- size = zpci_get_max_write_size((u64) src, (u64) dst, n, 8);
+ size = zpci_get_max_write_size((u64 __force) src,
+ (u64) dst, n, 8);
req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
rc = zpci_read_single(req, dst, offset, size);
if (rc)
@@ -162,7 +163,8 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
return -EINVAL;
while (n > 0) {
- size = zpci_get_max_write_size((u64) dst, (u64) src, n, 128);
+ size = zpci_get_max_write_size((u64 __force) dst,
+ (u64) src, n, 128);
req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
if (size > 8) /* main path */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index d39a31c3cdf2..e510b9460efa 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -22,8 +22,6 @@ unsigned long *page_table_alloc(struct mm_struct *);
void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
-void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
- bool init_skey);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 57c882761dea..5e102422c9ab 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -133,6 +133,18 @@ extern unsigned long MODULES_END;
#define MODULES_LEN (1UL << 31)
#endif
+static inline int is_module_addr(void *addr)
+{
+#ifdef CONFIG_64BIT
+ BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
+ if (addr < (void *)MODULES_VADDR)
+ return 0;
+ if (addr > (void *)MODULES_END)
+ return 0;
+#endif
+ return 1;
+}
+
/*
* A 31 bit pagetable entry of S390 has following format:
* | PFRA | | OS |
@@ -479,6 +491,11 @@ static inline int mm_has_pgste(struct mm_struct *mm)
return 0;
}
+/*
+ * In the case that a guest uses storage keys
+ * faults should no longer be backed by zero pages
+ */
+#define mm_forbids_zeropage mm_use_skey
static inline int mm_use_skey(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -1634,6 +1651,19 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
return pmd;
}
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL
+static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
+ unsigned long address,
+ pmd_t *pmdp, int full)
+{
+ pmd_t pmd = *pmdp;
+
+ if (!full)
+ pmdp_flush_lazy(mm, address, pmdp);
+ pmd_clear(pmdp);
+ return pmd;
+}
+
#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
@@ -1746,7 +1776,8 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern int vmem_remove_mapping(unsigned long start, unsigned long size);
extern int s390_enable_sie(void);
-extern void s390_enable_skey(void);
+extern int s390_enable_skey(void);
+extern void s390_reset_cmma(struct mm_struct *mm);
/*
* No page table caches to initialise
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index d6bdf906caa5..0e37cd041241 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -18,14 +18,7 @@ extern int spin_retry;
static inline int
_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new)
{
- unsigned int old_expected = old;
-
- asm volatile(
- " cs %0,%3,%1"
- : "=d" (old), "=Q" (*lock)
- : "0" (old), "d" (new), "Q" (*lock)
- : "cc", "memory" );
- return old == old_expected;
+ return __sync_bool_compare_and_swap(lock, old, new);
}
/*
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 4197c89c52d4..2b446cf0cc65 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -287,7 +287,9 @@
#define __NR_getrandom 349
#define __NR_memfd_create 350
#define __NR_bpf 351
-#define NR_syscalls 352
+#define __NR_s390_pci_mmio_write 352
+#define __NR_s390_pci_mmio_read 353
+#define NR_syscalls 354
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index ef279a136801..e07e91605353 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -17,8 +17,8 @@
* Make sure that the compiler is new enough. We want a compiler that
* is known to work with the "Q" assembler constraint.
*/
-#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-#error Your compiler is too old; please use version 3.3.3 or newer
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3)
+#error Your compiler is too old; please use version 4.3 or newer
#endif
int main(void)
@@ -156,7 +156,6 @@ int main(void)
DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
- DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func));
DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
BLANK();
DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 009f5eb11125..34d5fa7b01b5 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -434,7 +434,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
/* Signal frames without vectors registers are short ! */
- __u16 __user *svc = (void *) frame + frame_size - 2;
+ __u16 __user *svc = (void __user *) frame + frame_size - 2;
if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
return -EFAULT;
restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index c4f7a3d655b8..d7fa2f0f1425 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -218,3 +218,5 @@ COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char
COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags)
COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags)
COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size);
+COMPAT_SYSCALL_WRAP3(s390_pci_mmio_write, const unsigned long, mmio_addr, const void __user *, user_buffer, const size_t, length);
+COMPAT_SYSCALL_WRAP3(s390_pci_mmio_read, const unsigned long, mmio_addr, void __user *, user_buffer, const size_t, length);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index acb412442e5e..a99852e96a77 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -191,7 +191,8 @@ void die(struct pt_regs *regs, const char *str)
console_verbose();
spin_lock_irq(&die_lock);
bust_spinlocks(1);
- printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter);
+ printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
+ regs->int_code >> 17, ++die_counter);
#ifdef CONFIG_PREEMPT
printk("PREEMPT ");
#endif
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index cef2879edff3..302ac1f7f8e7 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -12,7 +12,6 @@
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/ctype.h>
-#include <linux/ftrace.h>
#include <linux/lockdep.h>
#include <linux/module.h>
#include <linux/pfn.h>
@@ -490,8 +489,5 @@ void __init startup_init(void)
detect_machine_facilities();
setup_topology();
sclp_early_detect();
-#ifdef CONFIG_DYNAMIC_FTRACE
- S390_lowcore.ftrace_func = (unsigned long)ftrace_caller;
-#endif
lockdep_on();
}
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 0554b9771c9f..8e61393c8275 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -74,4 +74,6 @@ struct old_sigaction;
long sys_s390_personality(unsigned int personality);
long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
+long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index ca1cabb3a96c..b86bb8823f15 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,6 +7,7 @@
* Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
+#include <linux/moduleloader.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
@@ -15,60 +16,39 @@
#include <linux/kprobes.h>
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
#include "entry.h"
-void mcount_replace_code(void);
-void ftrace_disable_code(void);
-void ftrace_enable_insn(void);
-
/*
* The mcount code looks like this:
* stg %r14,8(%r15) # offset 0
* larl %r1,<&counter> # offset 6
* brasl %r14,_mcount # offset 12
* lg %r14,8(%r15) # offset 18
- * Total length is 24 bytes. The complete mcount block initially gets replaced
- * by ftrace_make_nop. Subsequent calls to ftrace_make_call / ftrace_make_nop
- * only patch the jg/lg instruction within the block.
- * Note: we do not patch the first instruction to an unconditional branch,
- * since that would break kprobes/jprobes. It is easier to leave the larl
- * instruction in and only modify the second instruction.
+ * Total length is 24 bytes. Only the first instruction will be patched
+ * by ftrace_make_call / ftrace_make_nop.
* The enabled ftrace code block looks like this:
- * larl %r0,.+24 # offset 0
- * > lg %r1,__LC_FTRACE_FUNC # offset 6
- * br %r1 # offset 12
- * brcl 0,0 # offset 14
- * brc 0,0 # offset 20
+ * > brasl %r0,ftrace_caller # offset 0
+ * larl %r1,<&counter> # offset 6
+ * brasl %r14,_mcount # offset 12
+ * lg %r14,8(%r15) # offset 18
* The ftrace function gets called with a non-standard C function call ABI
* where r0 contains the return address. It is also expected that the called
* function only clobbers r0 and r1, but restores r2-r15.
+ * For module code we can't directly jump to ftrace caller, but need a
+ * trampoline (ftrace_plt), which clobbers also r1.
* The return point of the ftrace function has offset 24, so execution
* continues behind the mcount block.
- * larl %r0,.+24 # offset 0
- * > jg .+18 # offset 6
- * br %r1 # offset 12
- * brcl 0,0 # offset 14
- * brc 0,0 # offset 20
+ * The disabled ftrace code block looks like this:
+ * > jg .+24 # offset 0
+ * larl %r1,<&counter> # offset 6
+ * brasl %r14,_mcount # offset 12
+ * lg %r14,8(%r15) # offset 18
* The jg instruction branches to offset 24 to skip as many instructions
* as possible.
*/
-asm(
- " .align 4\n"
- "mcount_replace_code:\n"
- " larl %r0,0f\n"
- "ftrace_disable_code:\n"
- " jg 0f\n"
- " br %r1\n"
- " brcl 0,0\n"
- " brc 0,0\n"
- "0:\n"
- " .align 4\n"
- "ftrace_enable_insn:\n"
- " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n");
-
-#define MCOUNT_BLOCK_SIZE 24
-#define MCOUNT_INSN_OFFSET 6
-#define FTRACE_INSN_SIZE 6
+
+unsigned long ftrace_plt;
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
@@ -79,24 +59,62 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
- /* Initial replacement of the whole mcount block */
- if (addr == MCOUNT_ADDR) {
- if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET,
- mcount_replace_code,
- MCOUNT_BLOCK_SIZE))
- return -EPERM;
- return 0;
+ struct ftrace_insn insn;
+ unsigned short op;
+ void *from, *to;
+ size_t size;
+
+ ftrace_generate_nop_insn(&insn);
+ size = sizeof(insn);
+ from = &insn;
+ to = (void *) rec->ip;
+ if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op)))
+ return -EFAULT;
+ /*
+ * If we find a breakpoint instruction, a kprobe has been placed
+ * at the beginning of the function. We write the constant
+ * KPROBE_ON_FTRACE_NOP into the remaining four bytes of the original
+ * instruction so that the kprobes handler can execute a nop, if it
+ * reaches this breakpoint.
+ */
+ if (op == BREAKPOINT_INSTRUCTION) {
+ size -= 2;
+ from += 2;
+ to += 2;
+ insn.disp = KPROBE_ON_FTRACE_NOP;
}
- if (probe_kernel_write((void *) rec->ip, ftrace_disable_code,
- MCOUNT_INSN_SIZE))
+ if (probe_kernel_write(to, from, size))
return -EPERM;
return 0;
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn,
- FTRACE_INSN_SIZE))
+ struct ftrace_insn insn;
+ unsigned short op;
+ void *from, *to;
+ size_t size;
+
+ ftrace_generate_call_insn(&insn, rec->ip);
+ size = sizeof(insn);
+ from = &insn;
+ to = (void *) rec->ip;
+ if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op)))
+ return -EFAULT;
+ /*
+ * If we find a breakpoint instruction, a kprobe has been placed
+ * at the beginning of the function. We write the constant
+ * KPROBE_ON_FTRACE_CALL into the remaining four bytes of the original
+ * instruction so that the kprobes handler can execute a brasl if it
+ * reaches this breakpoint.
+ */
+ if (op == BREAKPOINT_INSTRUCTION) {
+ size -= 2;
+ from += 2;
+ to += 2;
+ insn.disp = KPROBE_ON_FTRACE_CALL;
+ }
+ if (probe_kernel_write(to, from, size))
return -EPERM;
return 0;
}
@@ -111,13 +129,30 @@ int __init ftrace_dyn_arch_init(void)
return 0;
}
+static int __init ftrace_plt_init(void)
+{
+ unsigned int *ip;
+
+ ftrace_plt = (unsigned long) module_alloc(PAGE_SIZE);
+ if (!ftrace_plt)
+ panic("cannot allocate ftrace plt\n");
+ ip = (unsigned int *) ftrace_plt;
+ ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
+ ip[1] = 0x100a0004;
+ ip[2] = 0x07f10000;
+ ip[3] = FTRACE_ADDR >> 32;
+ ip[4] = FTRACE_ADDR & 0xffffffff;
+ set_memory_ro(ftrace_plt, 1);
+ return 0;
+}
+device_initcall(ftrace_plt_init);
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* Hook the return address and push it in the stack of return addresses
* in current thread info.
*/
-unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
- unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
{
struct ftrace_graph_ent trace;
@@ -137,6 +172,7 @@ unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
out:
return parent;
}
+NOKPROBE_SYMBOL(prepare_ftrace_return);
/*
* Patch the kernel code at ftrace_graph_caller location. The instruction
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 7559f1beab29..05fbc2c98faf 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -19,7 +19,7 @@
static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
-void __kprobes enabled_wait(void)
+void enabled_wait(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
unsigned long long idle_time;
@@ -46,6 +46,7 @@ void __kprobes enabled_wait(void)
smp_wmb();
idle->sequence++;
}
+NOKPROBE_SYMBOL(enabled_wait);
static ssize_t show_idle_count(struct device *dev,
struct device_attribute *attr, char *buf)
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 1b8a38ab7861..f238720690f3 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -127,13 +127,10 @@ int show_interrupts(struct seq_file *p, void *v)
for_each_online_cpu(cpu)
seq_printf(p, "CPU%d ", cpu);
seq_putc(p, '\n');
- goto out;
}
if (index < NR_IRQS) {
if (index >= NR_IRQS_BASE)
goto out;
- /* Adjust index to process irqclass_main_desc array entries */
- index--;
seq_printf(p, "%s: ", irqclass_main_desc[index].name);
irq = irqclass_main_desc[index].irq;
for_each_online_cpu(cpu)
@@ -158,7 +155,7 @@ out:
unsigned int arch_dynirq_lower_bound(unsigned int from)
{
- return from < THIN_INTERRUPT ? THIN_INTERRUPT : from;
+ return from < NR_IRQS_BASE ? NR_IRQS_BASE : from;
}
/*
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 014d4729b134..ee0396755430 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/hardirq.h>
+#include <linux/ftrace.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>
#include <asm/dis.h>
@@ -58,12 +59,23 @@ struct kprobe_insn_cache kprobe_dmainsn_slots = {
.insn_size = MAX_INSN_SIZE,
};
-static void __kprobes copy_instruction(struct kprobe *p)
+static void copy_instruction(struct kprobe *p)
{
+ unsigned long ip = (unsigned long) p->addr;
s64 disp, new_disp;
u64 addr, new_addr;
- memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8));
+ if (ftrace_location(ip) == ip) {
+ /*
+ * If kprobes patches the instruction that is morphed by
+ * ftrace make sure that kprobes always sees the branch
+ * "jg .+24" that skips the mcount block
+ */
+ ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn);
+ p->ainsn.is_ftrace_insn = 1;
+ } else
+ memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8));
+ p->opcode = p->ainsn.insn[0];
if (!probe_is_insn_relative_long(p->ainsn.insn))
return;
/*
@@ -79,25 +91,14 @@ static void __kprobes copy_instruction(struct kprobe *p)
new_disp = ((addr + (disp * 2)) - new_addr) / 2;
*(s32 *)&p->ainsn.insn[1] = new_disp;
}
+NOKPROBE_SYMBOL(copy_instruction);
static inline int is_kernel_addr(void *addr)
{
return addr < (void *)_end;
}
-static inline int is_module_addr(void *addr)
-{
-#ifdef CONFIG_64BIT
- BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
- if (addr < (void *)MODULES_VADDR)
- return 0;
- if (addr > (void *)MODULES_END)
- return 0;
-#endif
- return 1;
-}
-
-static int __kprobes s390_get_insn_slot(struct kprobe *p)
+static int s390_get_insn_slot(struct kprobe *p)
{
/*
* Get an insn slot that is within the same 2GB area like the original
@@ -111,8 +112,9 @@ static int __kprobes s390_get_insn_slot(struct kprobe *p)
p->ainsn.insn = get_insn_slot();
return p->ainsn.insn ? 0 : -ENOMEM;
}
+NOKPROBE_SYMBOL(s390_get_insn_slot);
-static void __kprobes s390_free_insn_slot(struct kprobe *p)
+static void s390_free_insn_slot(struct kprobe *p)
{
if (!p->ainsn.insn)
return;
@@ -122,8 +124,9 @@ static void __kprobes s390_free_insn_slot(struct kprobe *p)
free_insn_slot(p->ainsn.insn, 0);
p->ainsn.insn = NULL;
}
+NOKPROBE_SYMBOL(s390_free_insn_slot);
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
+int arch_prepare_kprobe(struct kprobe *p)
{
if ((unsigned long) p->addr & 0x01)
return -EINVAL;
@@ -132,54 +135,79 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return -EINVAL;
if (s390_get_insn_slot(p))
return -ENOMEM;
- p->opcode = *p->addr;
copy_instruction(p);
return 0;
}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
-struct ins_replace_args {
- kprobe_opcode_t *ptr;
- kprobe_opcode_t opcode;
+int arch_check_ftrace_location(struct kprobe *p)
+{
+ return 0;
+}
+
+struct swap_insn_args {
+ struct kprobe *p;
+ unsigned int arm_kprobe : 1;
};
-static int __kprobes swap_instruction(void *aref)
+static int swap_instruction(void *data)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
unsigned long status = kcb->kprobe_status;
- struct ins_replace_args *args = aref;
-
+ struct swap_insn_args *args = data;
+ struct ftrace_insn new_insn, *insn;
+ struct kprobe *p = args->p;
+ size_t len;
+
+ new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
+ len = sizeof(new_insn.opc);
+ if (!p->ainsn.is_ftrace_insn)
+ goto skip_ftrace;
+ len = sizeof(new_insn);
+ insn = (struct ftrace_insn *) p->addr;
+ if (args->arm_kprobe) {
+ if (is_ftrace_nop(insn))
+ new_insn.disp = KPROBE_ON_FTRACE_NOP;
+ else
+ new_insn.disp = KPROBE_ON_FTRACE_CALL;
+ } else {
+ ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr);
+ if (insn->disp == KPROBE_ON_FTRACE_NOP)
+ ftrace_generate_nop_insn(&new_insn);
+ }
+skip_ftrace:
kcb->kprobe_status = KPROBE_SWAP_INST;
- probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode));
+ probe_kernel_write(p->addr, &new_insn, len);
kcb->kprobe_status = status;
return 0;
}
+NOKPROBE_SYMBOL(swap_instruction);
-void __kprobes arch_arm_kprobe(struct kprobe *p)
+void arch_arm_kprobe(struct kprobe *p)
{
- struct ins_replace_args args;
+ struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
- args.ptr = p->addr;
- args.opcode = BREAKPOINT_INSTRUCTION;
stop_machine(swap_instruction, &args, NULL);
}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
+void arch_disarm_kprobe(struct kprobe *p)
{
- struct ins_replace_args args;
+ struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
- args.ptr = p->addr;
- args.opcode = p->opcode;
stop_machine(swap_instruction, &args, NULL);
}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
-void __kprobes arch_remove_kprobe(struct kprobe *p)
+void arch_remove_kprobe(struct kprobe *p)
{
s390_free_insn_slot(p);
}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
-static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
- struct pt_regs *regs,
- unsigned long ip)
+static void enable_singlestep(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs,
+ unsigned long ip)
{
struct per_regs per_kprobe;
@@ -199,10 +227,11 @@ static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
regs->psw.addr = ip | PSW_ADDR_AMODE;
}
+NOKPROBE_SYMBOL(enable_singlestep);
-static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb,
- struct pt_regs *regs,
- unsigned long ip)
+static void disable_singlestep(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs,
+ unsigned long ip)
{
/* Restore control regs and psw mask, set new psw address */
__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
@@ -210,41 +239,43 @@ static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb,
regs->psw.mask |= kcb->kprobe_saved_imask;
regs->psw.addr = ip | PSW_ADDR_AMODE;
}
+NOKPROBE_SYMBOL(disable_singlestep);
/*
* Activate a kprobe by storing its pointer to current_kprobe. The
* previous kprobe is stored in kcb->prev_kprobe. A stack of up to
* two kprobes can be active, see KPROBE_REENTER.
*/
-static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
+static void push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
{
kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe);
kcb->prev_kprobe.status = kcb->kprobe_status;
__this_cpu_write(current_kprobe, p);
}
+NOKPROBE_SYMBOL(push_kprobe);
/*
* Deactivate a kprobe by backing up to the previous state. If the
* current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL,
* for any other state prev_kprobe.kp will be NULL.
*/
-static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb)
+static void pop_kprobe(struct kprobe_ctlblk *kcb)
{
__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
kcb->kprobe_status = kcb->prev_kprobe.status;
}
+NOKPROBE_SYMBOL(pop_kprobe);
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
- struct pt_regs *regs)
+void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
{
ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
/* Replace the return addr with trampoline addr */
regs->gprs[14] = (unsigned long) &kretprobe_trampoline;
}
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb,
- struct kprobe *p)
+static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
{
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
@@ -264,8 +295,9 @@ static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb,
BUG();
}
}
+NOKPROBE_SYMBOL(kprobe_reenter_check);
-static int __kprobes kprobe_handler(struct pt_regs *regs)
+static int kprobe_handler(struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb;
struct kprobe *p;
@@ -339,6 +371,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
preempt_enable_no_resched();
return 0;
}
+NOKPROBE_SYMBOL(kprobe_handler);
/*
* Function return probe trampoline:
@@ -355,8 +388,7 @@ static void __used kretprobe_trampoline_holder(void)
/*
* Called when the probe at kretprobe trampoline is hit
*/
-static int __kprobes trampoline_probe_handler(struct kprobe *p,
- struct pt_regs *regs)
+static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe_instance *ri;
struct hlist_head *head, empty_rp;
@@ -444,6 +476,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
*/
return 1;
}
+NOKPROBE_SYMBOL(trampoline_probe_handler);
/*
* Called after single-stepping. p->addr is the address of the
@@ -453,12 +486,30 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
* single-stepped a copy of the instruction. The address of this
* copy is p->ainsn.insn.
*/
-static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+static void resume_execution(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
int fixup = probe_get_fixup_type(p->ainsn.insn);
+ /* Check if the kprobes location is an enabled ftrace caller */
+ if (p->ainsn.is_ftrace_insn) {
+ struct ftrace_insn *insn = (struct ftrace_insn *) p->addr;
+ struct ftrace_insn call_insn;
+
+ ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr);
+ /*
+ * A kprobe on an enabled ftrace call site actually single
+ * stepped an unconditional branch (ftrace nop equivalent).
+ * Now we need to fixup things and pretend that a brasl r0,...
+ * was executed instead.
+ */
+ if (insn->disp == KPROBE_ON_FTRACE_CALL) {
+ ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE;
+ regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn);
+ }
+ }
+
if (fixup & FIXUP_PSW_NORMAL)
ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
@@ -476,8 +527,9 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
disable_singlestep(kcb, regs, ip);
}
+NOKPROBE_SYMBOL(resume_execution);
-static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+static int post_kprobe_handler(struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
struct kprobe *p = kprobe_running();
@@ -504,8 +556,9 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
return 1;
}
+NOKPROBE_SYMBOL(post_kprobe_handler);
-static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
+static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
struct kprobe *p = kprobe_running();
@@ -567,8 +620,9 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
}
return 0;
}
+NOKPROBE_SYMBOL(kprobe_trap_handler);
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
int ret;
@@ -579,12 +633,13 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
return ret;
}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
/*
* Wrapper routine to for handling exceptions.
*/
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
+int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
{
struct die_args *args = (struct die_args *) data;
struct pt_regs *regs = args->regs;
@@ -616,8 +671,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
return ret;
}
+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -635,13 +691,15 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack));
return 1;
}
+NOKPROBE_SYMBOL(setjmp_pre_handler);
-void __kprobes jprobe_return(void)
+void jprobe_return(void)
{
asm volatile(".word 0x0002");
}
+NOKPROBE_SYMBOL(jprobe_return);
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
unsigned long stack;
@@ -655,6 +713,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
preempt_enable_no_resched();
return 1;
}
+NOKPROBE_SYMBOL(longjmp_break_handler);
static struct kprobe trampoline = {
.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
@@ -666,7 +725,8 @@ int __init arch_init_kprobes(void)
return register_kprobe(&trampoline);
}
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+int arch_trampoline_kprobe(struct kprobe *p)
{
return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline;
}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 4300ea374826..b6dfc5bfcb89 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -27,6 +27,7 @@ ENTRY(ftrace_caller)
.globl ftrace_regs_caller
.set ftrace_regs_caller,ftrace_caller
lgr %r1,%r15
+ aghi %r0,MCOUNT_RETURN_FIXUP
aghi %r15,-STACK_FRAME_SIZE
stg %r1,__SF_BACKCHAIN(%r15)
stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index b878f12a9597..c3f8d157cb0d 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1383,7 +1383,6 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
cpuhw->lsctl.ed = 1;
/* Set in_use flag and store event */
- event->hw.idx = 0; /* only one sampling event per CPU supported */
cpuhw->event = event;
cpuhw->flags |= PMU_F_IN_USE;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index ed84cc224899..4192dfd55ddc 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -61,7 +61,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return sf->gprs[8];
}
-extern void __kprobes kernel_thread_starter(void);
+extern void kernel_thread_starter(void);
/*
* Free current thread data structures etc..
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 99a567b70d16..9eed6bb5c4a9 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -248,14 +248,27 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
*/
tmp = 0;
+ } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+ /*
+ * floating point control reg. is in the thread structure
+ */
+ tmp = child->thread.fp_regs.fpc;
+ tmp <<= BITS_PER_LONG - 32;
+
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
- /*
- * floating point regs. are stored in the thread structure
+ /*
+ * floating point regs. are either in child->thread.fp_regs
+ * or the child->thread.vxrs array
*/
- offset = addr - (addr_t) &dummy->regs.fp_regs;
- tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset);
- if (addr == (addr_t) &dummy->regs.fp_regs.fpc)
- tmp <<= BITS_PER_LONG - 32;
+ offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+#ifdef CONFIG_64BIT
+ if (child->thread.vxrs)
+ tmp = *(addr_t *)
+ ((addr_t) child->thread.vxrs + 2*offset);
+ else
+#endif
+ tmp = *(addr_t *)
+ ((addr_t) &child->thread.fp_regs.fprs + offset);
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
@@ -383,16 +396,29 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
*/
return 0;
+ } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+ /*
+ * floating point control reg. is in the thread structure
+ */
+ if ((unsigned int) data != 0 ||
+ test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+ return -EINVAL;
+ child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32);
+
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
/*
- * floating point regs. are stored in the thread structure
+ * floating point regs. are either in child->thread.fp_regs
+ * or the child->thread.vxrs array
*/
- if (addr == (addr_t) &dummy->regs.fp_regs.fpc)
- if ((unsigned int) data != 0 ||
- test_fp_ctl(data >> (BITS_PER_LONG - 32)))
- return -EINVAL;
- offset = addr - (addr_t) &dummy->regs.fp_regs;
- *(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data;
+ offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+#ifdef CONFIG_64BIT
+ if (child->thread.vxrs)
+ *(addr_t *)((addr_t)
+ child->thread.vxrs + 2*offset) = data;
+ else
+#endif
+ *(addr_t *)((addr_t)
+ &child->thread.fp_regs.fprs + offset) = data;
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
@@ -611,12 +637,26 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
*/
tmp = 0;
+ } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
+ /*
+ * floating point control reg. is in the thread structure
+ */
+ tmp = child->thread.fp_regs.fpc;
+
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
/*
- * floating point regs. are stored in the thread structure
+ * floating point regs. are either in child->thread.fp_regs
+ * or the child->thread.vxrs array
*/
- offset = addr - (addr_t) &dummy32->regs.fp_regs;
- tmp = *(__u32 *)((addr_t) &child->thread.fp_regs + offset);
+ offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+#ifdef CONFIG_64BIT
+ if (child->thread.vxrs)
+ tmp = *(__u32 *)
+ ((addr_t) child->thread.vxrs + 2*offset);
+ else
+#endif
+ tmp = *(__u32 *)
+ ((addr_t) &child->thread.fp_regs.fprs + offset);
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
@@ -722,15 +762,28 @@ static int __poke_user_compat(struct task_struct *child,
*/
return 0;
- } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+ } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
/*
- * floating point regs. are stored in the thread structure
+ * floating point control reg. is in the thread structure
*/
- if (addr == (addr_t) &dummy32->regs.fp_regs.fpc &&
- test_fp_ctl(tmp))
+ if (test_fp_ctl(tmp))
return -EINVAL;
- offset = addr - (addr_t) &dummy32->regs.fp_regs;
- *(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp;
+ child->thread.fp_regs.fpc = data;
+
+ } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+ /*
+ * floating point regs. are either in child->thread.fp_regs
+ * or the child->thread.vxrs array
+ */
+ offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+#ifdef CONFIG_64BIT
+ if (child->thread.vxrs)
+ *(__u32 *)((addr_t)
+ child->thread.vxrs + 2*offset) = tmp;
+ else
+#endif
+ *(__u32 *)((addr_t)
+ &child->thread.fp_regs.fprs + offset) = tmp;
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index e80d9ff9a56d..4e532c67832f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -41,7 +41,6 @@
#include <linux/ctype.h>
#include <linux/reboot.h>
#include <linux/topology.h>
-#include <linux/ftrace.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/memory.h>
@@ -356,7 +355,6 @@ static void __init setup_lowcore(void)
lc->steal_timer = S390_lowcore.steal_timer;
lc->last_update_timer = S390_lowcore.last_update_timer;
lc->last_update_clock = S390_lowcore.last_update_clock;
- lc->ftrace_func = S390_lowcore.ftrace_func;
restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
restart_stack += ASYNC_SIZE;
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 0c1a0ff0a558..6a2ac257d98f 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -371,7 +371,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
} else {
/* Signal frame without vector registers are short ! */
- __u16 __user *svc = (void *) frame + frame_size - 2;
+ __u16 __user *svc = (void __user *) frame + frame_size - 2;
if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
return -EFAULT;
restorer = (unsigned long) svc | PSW_ADDR_AMODE;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6fd9e60101f1..0b499f5cbe19 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -236,7 +236,6 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
lc->percpu_offset = __per_cpu_offset[cpu];
lc->kernel_asce = S390_lowcore.kernel_asce;
lc->machine_flags = S390_lowcore.machine_flags;
- lc->ftrace_func = S390_lowcore.ftrace_func;
lc->user_timer = lc->system_timer = lc->steal_timer = 0;
__ctl_store(lc->cregs_save_area, 0, 15);
save_access_regs((unsigned int *) lc->access_regs_save_area);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9f7087fd58de..a2987243bc76 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -360,3 +360,5 @@ SYSCALL(sys_seccomp,sys_seccomp,compat_sys_seccomp)
SYSCALL(sys_getrandom,sys_getrandom,compat_sys_getrandom)
SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */
SYSCALL(sys_bpf,sys_bpf,compat_sys_bpf)
+SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write)
+SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 005d665fe4a5..20660dddb2d6 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -61,10 +61,11 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators);
/*
* Scheduler clock - returns current time in nanosec units.
*/
-unsigned long long notrace __kprobes sched_clock(void)
+unsigned long long notrace sched_clock(void)
{
return tod_to_ns(get_tod_clock_monotonic());
}
+NOKPROBE_SYMBOL(sched_clock);
/*
* Monotonic_clock - returns # of nanoseconds passed since time_init()
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 9ff5ecba26ab..f081cf1157c3 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -49,7 +49,8 @@ static inline void report_user_fault(struct pt_regs *regs, int signr)
return;
if (!printk_ratelimit())
return;
- printk("User process fault: interruption code 0x%X ", regs->int_code);
+ printk("User process fault: interruption code %04x ilc:%d ",
+ regs->int_code & 0xffff, regs->int_code >> 17);
print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
printk("\n");
show_regs(regs);
@@ -87,16 +88,16 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
}
}
-static void __kprobes do_trap(struct pt_regs *regs, int si_signo, int si_code,
- char *str)
+static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
if (notify_die(DIE_TRAP, str, regs, 0,
regs->int_code, si_signo) == NOTIFY_STOP)
return;
do_report_trap(regs, si_signo, si_code, str);
}
+NOKPROBE_SYMBOL(do_trap);
-void __kprobes do_per_trap(struct pt_regs *regs)
+void do_per_trap(struct pt_regs *regs)
{
siginfo_t info;
@@ -111,6 +112,7 @@ void __kprobes do_per_trap(struct pt_regs *regs)
(void __force __user *) current->thread.per_event.address;
force_sig_info(SIGTRAP, &info, current);
}
+NOKPROBE_SYMBOL(do_per_trap);
void default_trap_handler(struct pt_regs *regs)
{
@@ -151,8 +153,6 @@ DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
"privileged operation")
DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
"special operation exception")
-DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN,
- "translation exception")
#ifdef CONFIG_64BIT
DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
@@ -179,7 +179,13 @@ static inline void do_fp_trap(struct pt_regs *regs, int fpc)
do_trap(regs, SIGFPE, si_code, "floating point exception");
}
-void __kprobes illegal_op(struct pt_regs *regs)
+void translation_exception(struct pt_regs *regs)
+{
+ /* May never happen. */
+ die(regs, "Translation exception");
+}
+
+void illegal_op(struct pt_regs *regs)
{
siginfo_t info;
__u8 opcode[6];
@@ -252,7 +258,7 @@ void __kprobes illegal_op(struct pt_regs *regs)
if (signal)
do_trap(regs, signal, ILL_ILLOPC, "illegal operation");
}
-
+NOKPROBE_SYMBOL(illegal_op);
#ifdef CONFIG_MATHEMU
void specification_exception(struct pt_regs *regs)
@@ -469,7 +475,7 @@ void space_switch_exception(struct pt_regs *regs)
do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
}
-void __kprobes kernel_stack_overflow(struct pt_regs * regs)
+void kernel_stack_overflow(struct pt_regs *regs)
{
bust_spinlocks(1);
printk("Kernel stack overflow.\n");
@@ -477,6 +483,7 @@ void __kprobes kernel_stack_overflow(struct pt_regs * regs)
bust_spinlocks(0);
panic("Corrupt kernel stack, can't continue.");
}
+NOKPROBE_SYMBOL(kernel_stack_overflow);
void __init trap_init(void)
{
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 55aade49b6d1..6b049ee75a56 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -271,7 +271,7 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_MEM_CLR_CMMA:
mutex_lock(&kvm->lock);
idx = srcu_read_lock(&kvm->srcu);
- page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false);
+ s390_reset_cmma(kvm->arch.gmap->mm);
srcu_read_unlock(&kvm->srcu, idx);
mutex_unlock(&kvm->lock);
ret = 0;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 72bb2dd8b9cd..f47cb0c6d906 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -156,21 +156,25 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
return 0;
}
-static void __skey_check_enable(struct kvm_vcpu *vcpu)
+static int __skey_check_enable(struct kvm_vcpu *vcpu)
{
+ int rc = 0;
if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
- return;
+ return rc;
- s390_enable_skey();
+ rc = s390_enable_skey();
trace_kvm_s390_skey_related_inst(vcpu);
vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+ return rc;
}
static int handle_skey(struct kvm_vcpu *vcpu)
{
- __skey_check_enable(vcpu);
+ int rc = __skey_check_enable(vcpu);
+ if (rc)
+ return rc;
vcpu->stat.instruction_storage_key++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
@@ -683,7 +687,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
}
if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
- __skey_check_enable(vcpu);
+ int rc = __skey_check_enable(vcpu);
+
+ if (rc)
+ return rc;
if (set_guest_storage_key(current->mm, useraddr,
vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index a2b81d6ce8a5..811937bb90be 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -261,8 +261,8 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
return;
if (!printk_ratelimit())
return;
- printk(KERN_ALERT "User process fault: interruption code 0x%X ",
- regs->int_code);
+ printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d",
+ regs->int_code & 0xffff, regs->int_code >> 17);
print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
printk(KERN_CONT "\n");
printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
@@ -548,7 +548,7 @@ out:
return fault;
}
-void __kprobes do_protection_exception(struct pt_regs *regs)
+void do_protection_exception(struct pt_regs *regs)
{
unsigned long trans_exc_code;
int fault;
@@ -574,8 +574,9 @@ void __kprobes do_protection_exception(struct pt_regs *regs)
if (unlikely(fault))
do_fault_error(regs, fault);
}
+NOKPROBE_SYMBOL(do_protection_exception);
-void __kprobes do_dat_exception(struct pt_regs *regs)
+void do_dat_exception(struct pt_regs *regs)
{
int access, fault;
@@ -584,6 +585,7 @@ void __kprobes do_dat_exception(struct pt_regs *regs)
if (unlikely(fault))
do_fault_error(regs, fault);
}
+NOKPROBE_SYMBOL(do_dat_exception);
#ifdef CONFIG_PFAULT
/*
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 1b79ca67392f..71c7eff2c89f 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -18,6 +18,8 @@
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/swapops.h>
+#include <linux/ksm.h>
+#include <linux/mman.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -750,8 +752,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
break;
/* Walk the process page table, lock and get pte pointer */
ptep = get_locked_pte(gmap->mm, addr, &ptl);
- if (unlikely(!ptep))
- continue;
+ VM_BUG_ON(!ptep);
/* Set notification bit in the pgste of the pte */
entry = *ptep;
if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
@@ -761,7 +762,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
gaddr += PAGE_SIZE;
len -= PAGE_SIZE;
}
- spin_unlock(ptl);
+ pte_unmap_unlock(ptep, ptl);
}
up_read(&gmap->mm->mmap_sem);
return rc;
@@ -834,99 +835,6 @@ static inline void page_table_free_pgste(unsigned long *table)
__free_page(page);
}
-static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd,
- unsigned long addr, unsigned long end, bool init_skey)
-{
- pte_t *start_pte, *pte;
- spinlock_t *ptl;
- pgste_t pgste;
-
- start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
- pte = start_pte;
- do {
- pgste = pgste_get_lock(pte);
- pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
- if (init_skey) {
- unsigned long address;
-
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
- PGSTE_GR_BIT | PGSTE_GC_BIT);
-
- /* skip invalid and not writable pages */
- if (pte_val(*pte) & _PAGE_INVALID ||
- !(pte_val(*pte) & _PAGE_WRITE)) {
- pgste_set_unlock(pte, pgste);
- continue;
- }
-
- address = pte_val(*pte) & PAGE_MASK;
- page_set_storage_key(address, PAGE_DEFAULT_KEY, 1);
- }
- pgste_set_unlock(pte, pgste);
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(start_pte, ptl);
-
- return addr;
-}
-
-static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud,
- unsigned long addr, unsigned long end, bool init_skey)
-{
- unsigned long next;
- pmd_t *pmd;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- next = page_table_reset_pte(mm, pmd, addr, next, init_skey);
- } while (pmd++, addr = next, addr != end);
-
- return addr;
-}
-
-static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd,
- unsigned long addr, unsigned long end, bool init_skey)
-{
- unsigned long next;
- pud_t *pud;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- next = page_table_reset_pmd(mm, pud, addr, next, init_skey);
- } while (pud++, addr = next, addr != end);
-
- return addr;
-}
-
-void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
- unsigned long end, bool init_skey)
-{
- unsigned long addr, next;
- pgd_t *pgd;
-
- down_write(&mm->mmap_sem);
- if (init_skey && mm_use_skey(mm))
- goto out_up;
- addr = start;
- pgd = pgd_offset(mm, addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
- } while (pgd++, addr = next, addr != end);
- if (init_skey)
- current->mm->context.use_skey = 1;
-out_up:
- up_write(&mm->mmap_sem);
-}
-EXPORT_SYMBOL(page_table_reset_pgste);
-
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq)
{
@@ -992,11 +900,6 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
return NULL;
}
-void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
- unsigned long end, bool init_skey)
-{
-}
-
static inline void page_table_free_pgste(unsigned long *table)
{
}
@@ -1347,13 +1250,89 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
*/
-void s390_enable_skey(void)
+static int __s390_enable_skey(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
{
- page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
+ unsigned long ptev;
+ pgste_t pgste;
+
+ pgste = pgste_get_lock(pte);
+ /*
+ * Remove all zero page mappings,
+ * after establishing a policy to forbid zero page mappings
+ * following faults for that page will get fresh anonymous pages
+ */
+ if (is_zero_pfn(pte_pfn(*pte))) {
+ ptep_flush_direct(walk->mm, addr, pte);
+ pte_val(*pte) = _PAGE_INVALID;
+ }
+ /* Clear storage key */
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
+ PGSTE_GR_BIT | PGSTE_GC_BIT);
+ ptev = pte_val(*pte);
+ if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
+ page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
+ pgste_set_unlock(pte, pgste);
+ return 0;
+}
+
+int s390_enable_skey(void)
+{
+ struct mm_walk walk = { .pte_entry = __s390_enable_skey };
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc = 0;
+
+ down_write(&mm->mmap_sem);
+ if (mm_use_skey(mm))
+ goto out_up;
+
+ mm->context.use_skey = 1;
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ MADV_UNMERGEABLE, &vma->vm_flags)) {
+ mm->context.use_skey = 0;
+ rc = -ENOMEM;
+ goto out_up;
+ }
+ }
+ mm->def_flags &= ~VM_MERGEABLE;
+
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+
+out_up:
+ up_write(&mm->mmap_sem);
+ return rc;
}
EXPORT_SYMBOL_GPL(s390_enable_skey);
/*
+ * Reset CMMA state, make all pages stable again.
+ */
+static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pgste_t pgste;
+
+ pgste = pgste_get_lock(pte);
+ pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+ pgste_set_unlock(pte, pgste);
+ return 0;
+}
+
+void s390_reset_cmma(struct mm_struct *mm)
+{
+ struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
+
+ down_write(&mm->mmap_sem);
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+ up_write(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(s390_reset_cmma);
+
+/*
* Test and reset if a guest page is dirty
*/
bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index a9e1dc4ae442..805d8b29193a 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,4 +3,4 @@
#
obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \
- pci_event.o pci_debug.o pci_insn.o
+ pci_event.o pci_debug.o pci_insn.o pci_mmio.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 2fa7b14b9c08..ed3725e2d16e 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -369,8 +369,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
- msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
- msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
+ msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
/* Allocate adapter summary indicator bit */
rc = -EIO;
@@ -474,7 +473,8 @@ static void zpci_map_resources(struct zpci_dev *zdev)
len = pci_resource_len(pdev, i);
if (!len)
continue;
- pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
+ pdev->resource[i].start =
+ (resource_size_t __force) pci_iomap(pdev, i, 0);
pdev->resource[i].end = pdev->resource[i].start + len - 1;
}
}
@@ -489,7 +489,8 @@ static void zpci_unmap_resources(struct zpci_dev *zdev)
len = pci_resource_len(pdev, i);
if (!len)
continue;
- pci_iounmap(pdev, (void *) pdev->resource[i].start);
+ pci_iounmap(pdev, (void __iomem __force *)
+ pdev->resource[i].start);
}
}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 6e22a247de9b..d6e411ed8b1f 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -62,6 +62,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->tlb_refresh = response->refresh;
zdev->dma_mask = response->dasm;
zdev->msi_addr = response->msia;
+ zdev->max_msi = response->noi;
zdev->fmb_update = response->mui;
switch (response->version) {
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
new file mode 100644
index 000000000000..62c5ea6d8682
--- /dev/null
+++ b/arch/s390/pci/pci_mmio.c
@@ -0,0 +1,115 @@
+/*
+ * Access to PCI I/O memory from user space programs.
+ *
+ * Copyright IBM Corp. 2014
+ * Author(s): Alexey Ishchuk <aishchuk@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+
+static long get_pfn(unsigned long user_addr, unsigned long access,
+ unsigned long *pfn)
+{
+ struct vm_area_struct *vma;
+ long ret;
+
+ down_read(&current->mm->mmap_sem);
+ ret = -EINVAL;
+ vma = find_vma(current->mm, user_addr);
+ if (!vma)
+ goto out;
+ ret = -EACCES;
+ if (!(vma->vm_flags & access))
+ goto out;
+ ret = follow_pfn(vma, user_addr, pfn);
+out:
+ up_read(&current->mm->mmap_sem);
+ return ret;
+}
+
+SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
+ const void __user *, user_buffer, size_t, length)
+{
+ u8 local_buf[64];
+ void __iomem *io_addr;
+ void *buf;
+ unsigned long pfn;
+ long ret;
+
+ if (!zpci_is_enabled())
+ return -ENODEV;
+
+ if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
+ return -EINVAL;
+ if (length > 64) {
+ buf = kmalloc(length, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ } else
+ buf = local_buf;
+
+ ret = get_pfn(mmio_addr, VM_WRITE, &pfn);
+ if (ret)
+ goto out;
+ io_addr = (void *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
+
+ ret = -EFAULT;
+ if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
+ goto out;
+
+ if (copy_from_user(buf, user_buffer, length))
+ goto out;
+
+ memcpy_toio(io_addr, buf, length);
+ ret = 0;
+out:
+ if (buf != local_buf)
+ kfree(buf);
+ return ret;
+}
+
+SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
+ void __user *, user_buffer, size_t, length)
+{
+ u8 local_buf[64];
+ void __iomem *io_addr;
+ void *buf;
+ unsigned long pfn;
+ long ret;
+
+ if (!zpci_is_enabled())
+ return -ENODEV;
+
+ if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
+ return -EINVAL;
+ if (length > 64) {
+ buf = kmalloc(length, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ } else
+ buf = local_buf;
+
+ ret = get_pfn(mmio_addr, VM_READ, &pfn);
+ if (ret)
+ goto out;
+ io_addr = (void *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
+
+ ret = -EFAULT;
+ if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
+ goto out;
+
+ memcpy_fromio(buf, io_addr, length);
+
+ if (copy_to_user(user_buffer, buf, length))
+ goto out;
+
+ ret = 0;
+out:
+ if (buf != local_buf)
+ kfree(buf);
+ return ret;
+}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 752e30d63904..177d5973b132 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -103,6 +103,17 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp,
+ int full)
+{
+ return pmdp_get_and_clear(mm, address, pmdp);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
+
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long address, pte_t *ptep,
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index f7296e57d614..5297f9fa0ef2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -335,6 +335,7 @@ extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
#endif
+int arch_check_ftrace_location(struct kprobe *p);
/* Get the kprobe at this addr (if any) - called with preemption disabled */
struct kprobe *get_kprobe(void *addr);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b46461116cd2..b922a16c9b5b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -56,6 +56,17 @@ extern int sysctl_legacy_va_layout;
#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
#endif
+/*
+ * To prevent common memory management code establishing
+ * a zero page mapping on a read fault.
+ * This macro should be defined within <asm/pgtable.h>.
+ * s390 does this to prevent multiplexing of hardware bits
+ * related to the physical page in case of virtualization.
+ */
+#ifndef mm_forbids_zeropage
+#define mm_forbids_zeropage(X) (0)
+#endif
+
extern unsigned long sysctl_user_reserve_kbytes;
extern unsigned long sysctl_admin_reserve_kbytes;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3995f546d0f3..317eb8ad28dd 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1410,16 +1410,10 @@ static inline int check_kprobe_rereg(struct kprobe *p)
return ret;
}
-static int check_kprobe_address_safe(struct kprobe *p,
- struct module **probed_mod)
+int __weak arch_check_ftrace_location(struct kprobe *p)
{
- int ret = 0;
unsigned long ftrace_addr;
- /*
- * If the address is located on a ftrace nop, set the
- * breakpoint to the following instruction.
- */
ftrace_addr = ftrace_location((unsigned long)p->addr);
if (ftrace_addr) {
#ifdef CONFIG_KPROBES_ON_FTRACE
@@ -1431,7 +1425,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
return -EINVAL;
#endif
}
+ return 0;
+}
+static int check_kprobe_address_safe(struct kprobe *p,
+ struct module **probed_mod)
+{
+ int ret;
+
+ ret = arch_check_ftrace_location(p);
+ if (ret)
+ return ret;
jump_label_lock();
preempt_disable();
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 02aa4185b17e..61eea02b53f5 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -169,6 +169,8 @@ cond_syscall(ppc_rtas);
cond_syscall(sys_spu_run);
cond_syscall(sys_spu_create);
cond_syscall(sys_subpage_prot);
+cond_syscall(sys_s390_pci_mmio_read);
+cond_syscall(sys_s390_pci_mmio_write);
/* mmu depending weak syscall entries */
cond_syscall(sys_mprotect);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index de984159cf0b..46a1e4dbba81 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -805,7 +805,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
return VM_FAULT_OOM;
- if (!(flags & FAULT_FLAG_WRITE) &&
+ if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm) &&
transparent_hugepage_use_zero_page()) {
spinlock_t *ptl;
pgtable_t pgtable;
@@ -1400,7 +1400,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
* pgtable_trans_huge_withdraw after finishing pmdp related
* operations.
*/
- orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd);
+ orig_pmd = pmdp_get_and_clear_full(tlb->mm, addr, pmd,
+ tlb->fullmm);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
if (is_huge_zero_pmd(orig_pmd)) {
diff --git a/mm/memory.c b/mm/memory.c
index 8b1c1d2e7c67..3cfea6b8d26b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2627,7 +2627,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_SIGBUS;
/* Use the zero-page for reads */
- if (!(flags & FAULT_FLAG_WRITE)) {
+ if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) {
entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
vma->vm_page_prot));
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 001facfa5b74..3d1984e59a30 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -404,7 +404,7 @@ do_file(char const *const fname)
}
if (w2(ghdr->e_machine) == EM_S390) {
reltype = R_390_64;
- mcount_adjust_64 = -8;
+ mcount_adjust_64 = -14;
}
if (w2(ghdr->e_machine) == EM_MIPS) {
reltype = R_MIPS_64;
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index d4b665610d67..56ea99a12ab7 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -243,7 +243,7 @@ if ($arch eq "x86_64") {
} elsif ($arch eq "s390" && $bits == 64) {
$mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$";
- $mcount_adjust = -8;
+ $mcount_adjust = -14;
$alignment = 8;
$type = ".quad";
$ld .= " -m elf64_s390";