From d04fa5a3ba06c3b7a1c4a6860d0fa4825507a755 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Jan 2011 15:30:09 +0100 Subject: locking: Remove deprecated lock initializers Last users are gone. Remove the left overs. Signed-off-by: Thomas Gleixner --- Documentation/spinlocks.txt | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'Documentation') diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt index 178c831b907d..2e3c64b1a6a5 100644 --- a/Documentation/spinlocks.txt +++ b/Documentation/spinlocks.txt @@ -86,7 +86,7 @@ to change the variables it has to get an exclusive write lock. The routines look the same as above: - rwlock_t xxx_lock = RW_LOCK_UNLOCKED; + rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock); unsigned long flags; @@ -196,25 +196,3 @@ appropriate: For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or __SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate. - -SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. These interfere -with lockdep state tracking. - -Most of the time, you can simply turn: - static spinlock_t xxx_lock = SPIN_LOCK_UNLOCKED; -into: - static DEFINE_SPINLOCK(xxx_lock); - -Static structure member variables go from: - - struct foo bar { - .lock = SPIN_LOCK_UNLOCKED; - }; - -to: - - struct foo bar { - .lock = __SPIN_LOCK_UNLOCKED(bar.lock); - }; - -Declaration of static rw_locks undergo a similar transformation. -- cgit v1.2.3 From 1ff511e35ed87cc2ebade9e678e4a2fe39b6f9c5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 4 Feb 2011 21:52:05 +0900 Subject: tracing/kprobes: Add bitfield type Add bitfield type for tracing arguments on kprobe-tracer. The syntax of a bitfield type is: b@/ e.g. Accessing 2 bits-width field with 4 bits-offset in 32 bits-width data at 4 bytes offseted from the address pointed by AX register: +4(%ax):b2@4/32 Since the width of container data depends on the arch, so I just added the container-size at the end. Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <20110204125205.9507.11363.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/trace/kprobetrace.txt | 16 +++++- kernel/trace/trace_kprobe.c | 104 +++++++++++++++++++++++++++++++++++- 2 files changed, 118 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 5f77d94598dd..6d27ab8d6e9f 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -42,11 +42,25 @@ Synopsis of kprobe_events +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types - (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported. + (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield + are supported. (*) only for return probe. (**) this is useful for fetching a field of data structures. +Types +----- +Several types are supported for fetch-args. Kprobe tracer will access memory +by given type. Prefix 's' and 'u' means those types are signed and unsigned +respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). +String type is a special type, which fetches a "null-terminated" string from +kernel space. This means it will fail and store NULL if the string container +has been paged out. +Bitfield is another special type, which takes 3 parameters, bit-width, bit- +offset, and container-size (usually 32). The syntax is; + + b@/ + Per-Probe Event Filtering ------------------------- diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c6ed88660856..ccdc542022c3 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -353,6 +353,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) kfree(data); } +/* Bitfield fetch function */ +struct bitfield_fetch_param { + struct fetch_param orig; + unsigned char hi_shift; + unsigned char low_shift; +}; + +#define DEFINE_FETCH_bitfield(type) \ +static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\ + void *data, void *dest) \ +{ \ + struct bitfield_fetch_param *bprm = data; \ + type buf = 0; \ + call_fetch(&bprm->orig, regs, &buf); \ + if (buf) { \ + buf <<= bprm->hi_shift; \ + buf >>= bprm->low_shift; \ + } \ + *(type *)dest = buf; \ +} +DEFINE_BASIC_FETCH_FUNCS(bitfield) +#define fetch_bitfield_string NULL +#define fetch_bitfield_string_size NULL + +static __kprobes void +free_bitfield_fetch_param(struct bitfield_fetch_param *data) +{ + /* + * Don't check the bitfield itself, because this must be the + * last fetch function. + */ + if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) + free_deref_fetch_param(data->orig.data); + else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) + free_symbol_cache(data->orig.data); + kfree(data); +} /* Default (unsigned long) fetch type */ #define __DEFAULT_FETCH_TYPE(t) u##t #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) @@ -367,6 +404,7 @@ enum { FETCH_MTD_memory, FETCH_MTD_symbol, FETCH_MTD_deref, + FETCH_MTD_bitfield, FETCH_MTD_END, }; @@ -387,6 +425,7 @@ ASSIGN_FETCH_FUNC(retval, ftype), \ ASSIGN_FETCH_FUNC(memory, ftype), \ ASSIGN_FETCH_FUNC(symbol, ftype), \ ASSIGN_FETCH_FUNC(deref, ftype), \ +ASSIGN_FETCH_FUNC(bitfield, ftype), \ } \ } @@ -430,9 +469,33 @@ static const struct fetch_type *find_fetch_type(const char *type) if (!type) type = DEFAULT_FETCH_TYPE_STR; + /* Special case: bitfield */ + if (*type == 'b') { + unsigned long bs; + type = strchr(type, '/'); + if (!type) + goto fail; + type++; + if (strict_strtoul(type, 0, &bs)) + goto fail; + switch (bs) { + case 8: + return find_fetch_type("u8"); + case 16: + return find_fetch_type("u16"); + case 32: + return find_fetch_type("u32"); + case 64: + return find_fetch_type("u64"); + default: + goto fail; + } + } + for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) if (strcmp(type, fetch_type_table[i].name) == 0) return &fetch_type_table[i]; +fail: return NULL; } @@ -586,7 +649,9 @@ error: static void free_probe_arg(struct probe_arg *arg) { - if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) + if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) + free_bitfield_fetch_param(arg->fetch.data); + else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) free_deref_fetch_param(arg->fetch.data); else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) free_symbol_cache(arg->fetch.data); @@ -806,6 +871,41 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t, return ret; } +#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long)) + +/* Bitfield type needs to be parsed into a fetch function */ +static int __parse_bitfield_probe_arg(const char *bf, + const struct fetch_type *t, + struct fetch_param *f) +{ + struct bitfield_fetch_param *bprm; + unsigned long bw, bo; + char *tail; + + if (*bf != 'b') + return 0; + + bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); + if (!bprm) + return -ENOMEM; + bprm->orig = *f; + f->fn = t->fetch[FETCH_MTD_bitfield]; + f->data = (void *)bprm; + + bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */ + if (bw == 0 || *tail != '@') + return -EINVAL; + + bf = tail + 1; + bo = simple_strtoul(bf, &tail, 0); + if (tail == bf || *tail != '/') + return -EINVAL; + + bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo); + bprm->low_shift = bprm->hi_shift + bo; + return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0; +} + /* String length checking wrapper */ static int parse_probe_arg(char *arg, struct trace_probe *tp, struct probe_arg *parg, int is_return) @@ -835,6 +935,8 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp, parg->offset = tp->size; tp->size += parg->type->size; ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); + if (ret >= 0) + ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); if (ret >= 0) { parg->fetch_size.fn = get_fetch_size_function(parg->type, parg->fetch.fn); -- cgit v1.2.3 From c763ba06bd9b5db2c46c36276c89103d92d2c604 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 3 Feb 2011 14:27:22 +1100 Subject: tracing/syscalls: Make arch_syscall_addr weak Some architectures use non-trivial system call tables and will not work with the generic arch_syscall_addr code. For example, PowerPC64 uses a table of twin long longs. This patch makes the generic arch_syscall_addr weak to allow architectures with non-trivial system call tables to override it. Signed-off-by: Ian Munsie LKML-Reference: <1296703645-18718-4-git-send-email-imunsie@au1.ibm.com> Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace-design.txt | 3 +++ kernel/trace/trace_syscalls.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index dc52bd442c92..6fca17beee2f 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -247,6 +247,9 @@ You need very few things to get the syscalls tracing in an arch. - Support the TIF_SYSCALL_TRACEPOINT thread flags. - Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace in the ptrace syscalls tracing path. +- If the system call table on this arch is more complicated than a simple array + of addresses of the system calls, implement an arch_syscall_addr to return + the address of a given system call. - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 423094288fb5..af831545f656 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -446,7 +446,7 @@ int init_syscall_trace(struct ftrace_event_call *call) return id; } -unsigned long __init arch_syscall_addr(int nr) +unsigned long __init __weak arch_syscall_addr(int nr) { return (unsigned long)sys_call_table[nr]; } -- cgit v1.2.3 From b2d55496818d64310b9f5486d4eea76ea614d7f8 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 3 Feb 2011 14:27:23 +1100 Subject: tracing/syscalls: Allow arch specific syscall symbol matching Some architectures have unusual symbol names and the generic code to match the symbol name with the function name for the syscall metadata will fail. For example, symbols on PPC64 start with a period and the generic code will fail to match them. This patch moves the match logic out into a separate function which an arch can override by defining ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and implementing arch_syscall_match_sym_name. Signed-off-by: Ian Munsie LKML-Reference: <1296703645-18718-5-git-send-email-imunsie@au1.ibm.com> Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace-design.txt | 4 ++++ kernel/trace/trace_syscalls.c | 21 ++++++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 6fca17beee2f..79fcafc7fd64 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -250,6 +250,10 @@ You need very few things to get the syscalls tracing in an arch. - If the system call table on this arch is more complicated than a simple array of addresses of the system calls, implement an arch_syscall_addr to return the address of a given system call. +- If the symbol names of the system calls do not match the function names on + this arch, define ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and + implement arch_syscall_match_sym_name with the appropriate logic to return + true if the function name corresponds with the symbol name. - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index af831545f656..86a23e7de031 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -60,6 +60,19 @@ extern struct syscall_metadata *__stop_syscalls_metadata[]; static struct syscall_metadata **syscalls_metadata; +#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) +{ + /* + * Only compare after the "sys" prefix. Archs that use + * syscall wrappers may have syscalls symbols aliases prefixed + * with "SyS" instead of "sys", leading to an unwanted + * mismatch. + */ + return !strcmp(sym + 3, name + 3); +} +#endif + static __init struct syscall_metadata * find_syscall_meta(unsigned long syscall) { @@ -73,13 +86,7 @@ find_syscall_meta(unsigned long syscall) kallsyms_lookup(syscall, NULL, NULL, NULL, str); for ( ; start < stop; start++) { - /* - * Only compare after the "sys" prefix. Archs that use - * syscall wrappers may have syscalls symbols aliases prefixed - * with "SyS" instead of "sys", leading to an unwanted - * mismatch. - */ - if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) + if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name)) return *start; } return NULL; -- cgit v1.2.3 From 87d80de2800d087ea833cb79bc13f85ff34ed49f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 8 Feb 2011 13:19:49 -0500 Subject: tracing: Remove obsolete sched_switch tracer The trace events sched_switch and sched_wakeup do the same thing as the stand alone sched_switch tracer does. It is no longer needed. Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 110 -------------------------------------- kernel/trace/trace_sched_switch.c | 48 ----------------- 2 files changed, 158 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 557c1edeccaf..65eddb7cfa02 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -202,10 +202,6 @@ Here is the list of current tracers that may be configured. to draw a graph of function calls similar to C code source. - "sched_switch" - - Traces the context switches and wakeups between tasks. - "irqsoff" Traces the areas that disable interrupts and saves @@ -273,39 +269,6 @@ format, the function name that was traced "path_put" and the parent function that called this function "path_walk". The timestamp is the time at which the function was entered. -The sched_switch tracer also includes tracing of task wakeups -and context switches. - - ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S - ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S - ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R - events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R - kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R - ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R - -Wake ups are represented by a "+" and the context switches are -shown as "==>". The format is: - - Context switches: - - Previous task Next Task - - :: ==> :: - - Wake ups: - - Current task Task waking up - - :: + :: - -The prio is the internal kernel priority, which is the inverse -of the priority that is usually displayed by user-space tools. -Zero represents the highest priority (99). Prio 100 starts the -"nice" priorities with 100 being equal to nice -20 and 139 being -nice 19. The prio "140" is reserved for the idle task which is -the lowest priority thread (pid 0). - - Latency trace format -------------------- @@ -491,79 +454,6 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] latencies, as described in "Latency trace format". -sched_switch ------------- - -This tracer simply records schedule switches. Here is an example -of how to use it. - - # echo sched_switch > current_tracer - # echo 1 > tracing_enabled - # sleep 1 - # echo 0 > tracing_enabled - # cat trace - -# tracer: sched_switch -# -# TASK-PID CPU# TIMESTAMP FUNCTION -# | | | | | - bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R - bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R - sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R - bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S - bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R - sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R - bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D - bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R - -0 [00] 240.132589: 0:140:R + 4:115:S - -0 [00] 240.132591: 0:140:R ==> 4:115:R - ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R - -0 [00] 240.132598: 0:140:R + 4:115:S - -0 [00] 240.132599: 0:140:R ==> 4:115:R - ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R - sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R - [...] - - -As we have discussed previously about this format, the header -shows the name of the trace and points to the options. The -"FUNCTION" is a misnomer since here it represents the wake ups -and context switches. - -The sched_switch file only lists the wake ups (represented with -'+') and context switches ('==>') with the previous task or -current task first followed by the next task or task waking up. -The format for both of these is PID:KERNEL-PRIO:TASK-STATE. -Remember that the KERNEL-PRIO is the inverse of the actual -priority with zero (0) being the highest priority and the nice -values starting at 100 (nice -20). Below is a quick chart to map -the kernel priority to user land priorities. - - Kernel Space User Space - =============================================================== - 0(high) to 98(low) user RT priority 99(high) to 1(low) - with SCHED_RR or SCHED_FIFO - --------------------------------------------------------------- - 99 sched_priority is not used in scheduling - decisions(it must be specified as 0) - --------------------------------------------------------------- - 100(high) to 139(low) user nice -20(high) to 19(low) - --------------------------------------------------------------- - 140 idle task priority - --------------------------------------------------------------- - -The task states are: - - R - running : wants to run, may not actually be running - S - sleep : process is waiting to be woken up (handles signals) - D - disk sleep (uninterruptible sleep) : process must be woken up - (ignores signals) - T - stopped : process suspended - t - traced : process is being traced (with something like gdb) - Z - zombie : process waiting to be cleaned up - X - unknown - - ftrace_enabled -------------- diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 8f758d070c43..7e62c0a18456 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -247,51 +247,3 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr) ctx_trace = tr; } -static void stop_sched_trace(struct trace_array *tr) -{ - tracing_stop_sched_switch_record(); -} - -static int sched_switch_trace_init(struct trace_array *tr) -{ - ctx_trace = tr; - tracing_reset_online_cpus(tr); - tracing_start_sched_switch_record(); - return 0; -} - -static void sched_switch_trace_reset(struct trace_array *tr) -{ - if (sched_ref) - stop_sched_trace(tr); -} - -static void sched_switch_trace_start(struct trace_array *tr) -{ - sched_stopped = 0; -} - -static void sched_switch_trace_stop(struct trace_array *tr) -{ - sched_stopped = 1; -} - -static struct tracer sched_switch_trace __read_mostly = -{ - .name = "sched_switch", - .init = sched_switch_trace_init, - .reset = sched_switch_trace_reset, - .start = sched_switch_trace_start, - .stop = sched_switch_trace_stop, - .wait_pipe = poll_wait_pipe, -#ifdef CONFIG_FTRACE_SELFTEST - .selftest = trace_selftest_startup_sched_switch, -#endif -}; - -__init static int init_sched_switch_trace(void) -{ - return register_tracer(&sched_switch_trace); -} -device_initcall(init_sched_switch_trace); - -- cgit v1.2.3 From 6752ab4a9c30d5411b2dfdb251a3f1cb18aae487 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 8 Feb 2011 13:54:06 -0500 Subject: tracing: Deprecate tracing_enabled for tracing_on tracing_enabled should not be used, it is heavy weight and does not do much in helping lower the overhead. tracing_on should be used instead. Warn users to use tracing_on when tracing_enabled is used as it will soon be removed from the tracing directory. Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 38 +++++++++++++++++++------------------- kernel/trace/trace.c | 4 ++++ 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 65eddb7cfa02..67f1cc473257 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -80,11 +80,11 @@ of ftrace. Here is a list of some of the key files: tracers listed here can be configured by echoing their name into current_tracer. - tracing_enabled: + tracing_on: - This sets or displays whether the current_tracer - is activated and tracing or not. Echo 0 into this - file to disable the tracer or 1 to enable it. + This sets or displays whether writing to the trace + ring buffer is enabled. Echo 0 into this file to disable + the tracer or 1 to enable it. trace: @@ -497,10 +497,10 @@ an example: # echo irqsoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # ls -ltr [...] - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: irqsoff # @@ -605,10 +605,10 @@ is much like the irqsoff tracer. # echo preemptoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # ls -ltr [...] - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: preemptoff # @@ -753,10 +753,10 @@ tracers. # echo preemptirqsoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # ls -ltr [...] - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: preemptirqsoff # @@ -916,9 +916,9 @@ Instead of performing an 'ls', we will run 'sleep 1' under # echo wakeup > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # chrt -f 5 sleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: wakeup # @@ -1030,9 +1030,9 @@ ftrace_enabled is set; otherwise this tracer is a nop. # sysctl kernel.ftrace_enabled=1 # echo function > current_tracer - # echo 1 > tracing_enabled + # echo 1 > tracing_on # usleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: function # @@ -1070,7 +1070,7 @@ int trace_fd; [...] int main(int argc, char *argv[]) { [...] - trace_fd = open(tracing_file("tracing_enabled"), O_WRONLY); + trace_fd = open(tracing_file("tracing_on"), O_WRONLY); [...] if (condition_hit()) { write(trace_fd, "0", 1); @@ -1521,9 +1521,9 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt: # echo sys_nanosleep hrtimer_interrupt \ > set_ftrace_filter # echo function > current_tracer - # echo 1 > tracing_enabled + # echo 1 > tracing_on # usleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: ftrace # @@ -1769,9 +1769,9 @@ different. The trace is live. # echo function > current_tracer # cat trace_pipe > /tmp/trace.out & [1] 4153 - # echo 1 > tracing_enabled + # echo 1 > tracing_on # usleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: function # diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dc53ecb80589..8dc8da6733f9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2710,6 +2710,10 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, mutex_lock(&trace_types_lock); if (tracer_enabled ^ val) { + + /* Only need to warn if this is used to change the state */ + WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on"); + if (val) { tracer_enabled = 1; if (current_trace->start) -- cgit v1.2.3 From da6b737b9ab768dd06bb4b0395131d10e524cf83 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 22 Feb 2011 21:07:37 +0100 Subject: x86: Add device tree support This patch adds minimal support for device tree on x86. The device tree blob is passed to the kernel via setup_data which requires at least boot protocol 2.09. Memory size, restricted memory regions, boot arguments are gathered the traditional way so things like cmd_line are just here to let the code compile. The current plan is use the device tree as an extension and to gather information which can not be enumerated and would have to be hardcoded otherwise. This includes things like - which devices are on this I2C/SPI bus? - how are the interrupts wired to IO APIC? - where could my hpet be? Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Dirk Brandewie Acked-by: Grant Likely Cc: sodaville@linutronix.de Cc: devicetree-discuss@lists.ozlabs.org LKML-Reference: <1298405266-1624-3-git-send-email-bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner --- Documentation/devicetree/booting-without-of.txt | 20 ++++++++++ arch/x86/Kconfig | 2 + arch/x86/include/asm/bootparam.h | 1 + arch/x86/include/asm/irq.h | 3 -- arch/x86/include/asm/prom.h | 48 ++++++++++++++++++++++- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/devicetree.c | 51 +++++++++++++++++++++++++ arch/x86/kernel/irq.c | 9 ----- arch/x86/kernel/setup.c | 4 ++ 9 files changed, 126 insertions(+), 13 deletions(-) create mode 100644 arch/x86/kernel/devicetree.c (limited to 'Documentation') diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt index 28b1c9d3d351..55fd2623445b 100644 --- a/Documentation/devicetree/booting-without-of.txt +++ b/Documentation/devicetree/booting-without-of.txt @@ -13,6 +13,7 @@ Table of Contents I - Introduction 1) Entry point for arch/powerpc + 2) Entry point for arch/x86 II - The DT block format 1) Header @@ -225,6 +226,25 @@ it with special cases. cannot support both configurations with Book E and configurations with classic Powerpc architectures. +2) Entry point for arch/x86 +------------------------------- + + There is one single 32bit entry point to the kernel at code32_start, + the decompressor (the real mode entry point goes to the same 32bit + entry point once it switched into protected mode). That entry point + supports one calling convention which is documented in + Documentation/x86/boot.txt + The physical pointer to the device-tree block (defined in chapter II) + is passed via setup_data which requires at least boot protocol 2.09. + The type filed is defined as + + #define SETUP_DTB 2 + + This device-tree is used as an extension to the "boot page". As such it + does not parse / consider data which is already covered by the boot + page. This includes memory size, reserved ranges, command line arguments + or initrd address. It simply holds information which can not be retrieved + otherwise like interrupt routing or a list of devices behind an I2C bus. II - The DT block format ======================== diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index efbae0c7521f..b4c2e9c67623 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -382,6 +382,8 @@ config X86_INTEL_CE depends on X86_32 depends on X86_EXTENDED_PLATFORM select X86_REBOOTFIXUPS + select OF + select OF_EARLY_FLATTREE ---help--- Select for the Intel CE media processor (CE4100) SOC. This option compiles in support for the CE4100 SOC for settop diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index c8bfe63a06de..e020d88ec02d 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h @@ -12,6 +12,7 @@ /* setup data types */ #define SETUP_NONE 0 #define SETUP_E820_EXT 1 +#define SETUP_DTB 2 /* extensible setup data list node */ struct setup_data { diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index c704b38c57a2..ba870bb6dd8e 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -10,9 +10,6 @@ #include #include -/* Even though we don't support this, supply it to appease OF */ -static inline void irq_dispose_mapping(unsigned int virq) { } - static inline int irq_canonicalize(int irq) { return ((irq == 2) ? 9 : irq); diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index b4ec95f07518..e46f2a2b57a1 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -1 +1,47 @@ -/* dummy prom.h; here to make linux/of.h's #includes happy */ +/* + * Definitions for Device tree / OpenFirmware handling on X86 + * + * based on arch/powerpc/include/asm/prom.h which is + * Copyright (C) 1996-2005 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_X86_PROM_H +#define _ASM_X86_PROM_H +#ifndef __ASSEMBLY__ + +#include +#include + +#include +#include +#include + +#ifdef CONFIG_OF +extern void add_dtb(u64 data); +#else +static inline void add_dtb(u64 data) { } +#endif + +extern char cmd_line[COMMAND_LINE_SIZE]; + +#define pci_address_to_pio pci_address_to_pio +unsigned long pci_address_to_pio(phys_addr_t addr); + +/** + * irq_dispose_mapping - Unmap an interrupt + * @virq: linux virq number of the interrupt to unmap + * + * FIXME: We really should implement proper virq handling like power, + * but that's going to be major surgery. + */ +static inline void irq_dispose_mapping(unsigned int virq) { } + +#define HAVE_ARCH_DEVTREE_FIXUPS + +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 34244b2cd880..6ac5036adf42 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -109,6 +109,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o +obj-$(CONFIG_OF) += devicetree.o ### # 64 bit specific files diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c new file mode 100644 index 000000000000..0b8f0daef933 --- /dev/null +++ b/arch/x86/kernel/devicetree.c @@ -0,0 +1,51 @@ +/* + * Architecture specific OF callbacks. + */ +#include +#include +#include +#include +#include +#include +#include + +char __initdata cmd_line[COMMAND_LINE_SIZE]; + +unsigned int irq_create_of_mapping(struct device_node *controller, + const u32 *intspec, unsigned int intsize) +{ + return intspec[0]; + +} +EXPORT_SYMBOL_GPL(irq_create_of_mapping); + +unsigned long pci_address_to_pio(phys_addr_t address) +{ + /* + * The ioport address can be directly used by inX / outX + */ + BUG_ON(address >= (1 << 16)); + return (unsigned long)address; +} +EXPORT_SYMBOL_GPL(pci_address_to_pio); + +void __init early_init_dt_scan_chosen_arch(unsigned long node) +{ + BUG(); +} + +void __init early_init_dt_add_memory_arch(u64 base, u64 size) +{ + BUG(); +} + +void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +{ + return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS)); +} + +void __init add_dtb(u64 data) +{ + initial_boot_params = phys_to_virt((u64) (u32) data + + offsetof(struct setup_data, data)); +} diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 387b6a0c9e81..753136003af1 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -276,15 +276,6 @@ void smp_x86_platform_ipi(struct pt_regs *regs) EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); -#ifdef CONFIG_OF -unsigned int irq_create_of_mapping(struct device_node *controller, - const u32 *intspec, unsigned int intsize) -{ - return intspec[0]; -} -EXPORT_SYMBOL_GPL(irq_create_of_mapping); -#endif - #ifdef CONFIG_HOTPLUG_CPU /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9521483ce58c..33dcbce1ab0f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -113,6 +113,7 @@ #endif #include #include +#include /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -445,6 +446,9 @@ static void __init parse_setup_data(void) case SETUP_E820_EXT: parse_e820_ext(data); break; + case SETUP_DTB: + add_dtb(pa_data); + break; default: break; } -- cgit v1.2.3 From df2634f43f5106947f3735a0b61a6527a4b278cd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 22 Feb 2011 21:07:38 +0100 Subject: x86: dtb: Add a device tree for CE4100 History: v1..v2: - dropped device_type except for cpu & pci. I have the compatible string for pci so I can drop the device_type once it is possible - I lowercased all compatible types. I will need to resend some patches which have upper case intel - The cpu had the same compatible string as the soc node. So I added to the soc node -immr for internel memory mapped registers. - I added generic names for all parts. - I reworked the i2c bars matching the way you suggested. I added a compatible node for the PCI device which only the PCI ids in its compatible string. The bars (each represents a complete i2c controller) have a "intel,ce4100-i2c-controller" compatible node. It is not used by the driver. The driver is probed via PCI ids (by the pci subsystem not OF) and matches the bar address against the ressource in the child node. Once there is a hit the node is attached. - The SPI driver is also probed via pci. However I also attached a compatible property based on PCI ids v2..v3: - intel,ce4100-immr become intel,ce4100-cp. cp stands for core peripherals. The Atom data sheet talks here about ACPI devices. Since we don't have ACPI this does not apply here. - The interrupt map is gone. There are now plenty of device nodes. - The "unit address string" got fixed, it uses not DD,V format. v3..v4: - added descriptions for compatible nodes introduced here: - intel,ce4100-ioapic - intel,ce4100-lapic - intel,ce4100-hpet - intel,ce4100 - intel,ce4100-cp - intel,ce4100-pci - added a description about I2C controller magic. - Added gpio-controller and gpio-cells property to gpio devices. Those properties are not (yet) used. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Dirk Brandewie Acked-by: Grant Likely Cc: sodaville@linutronix.de Cc: devicetree-discuss@lists.ozlabs.org LKML-Reference: <1298405266-1624-4-git-send-email-bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner --- .../devicetree/bindings/i2c/ce4100-i2c.txt | 93 +++++ Documentation/devicetree/bindings/x86/ce4100.txt | 38 ++ .../devicetree/bindings/x86/interrupt.txt | 29 ++ Documentation/devicetree/bindings/x86/timer.txt | 6 + arch/x86/platform/ce4100/falconfalls.dts | 428 +++++++++++++++++++++ 5 files changed, 594 insertions(+) create mode 100644 Documentation/devicetree/bindings/i2c/ce4100-i2c.txt create mode 100644 Documentation/devicetree/bindings/x86/ce4100.txt create mode 100644 Documentation/devicetree/bindings/x86/interrupt.txt create mode 100644 Documentation/devicetree/bindings/x86/timer.txt create mode 100644 arch/x86/platform/ce4100/falconfalls.dts (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt b/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt new file mode 100644 index 000000000000..569b16248514 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt @@ -0,0 +1,93 @@ +CE4100 I2C +---------- + +CE4100 has one PCI device which is described as the I2C-Controller. This +PCI device has three PCI-bars, each bar contains a complete I2C +controller. So we have a total of three independent I2C-Controllers +which share only an interrupt line. +The driver is probed via the PCI-ID and is gathering the information of +attached devices from the devices tree. +Grant Likely recommended to use the ranges property to map the PCI-Bar +number to its physical address and to use this to find the child nodes +of the specific I2C controller. This were his exact words: + + Here's where the magic happens. Each entry in + ranges describes how the parent pci address space + (middle group of 3) is translated to the local + address space (first group of 2) and the size of + each range (last cell). In this particular case, + the first cell of the local address is chosen to be + 1:1 mapped to the BARs, and the second is the + offset from be base of the BAR (which would be + non-zero if you had 2 or more devices mapped off + the same BAR) + + ranges allows the address mapping to be described + in a way that the OS can interpret without + requiring custom device driver code. + +This is an example which is used on FalconFalls: +------------------------------------------------ + i2c-controller@b,2 { + #address-cells = <2>; + #size-cells = <1>; + compatible = "pci8086,2e68.2", + "pci8086,2e68", + "pciclass,ff0000", + "pciclass,ff00"; + + reg = <0x15a00 0x0 0x0 0x0 0x0>; + interrupts = <16 1>; + + /* as described by Grant, the first number in the group of + * three is the bar number followed by the 64bit bar address + * followed by size of the mapping. The bar address + * requires also a valid translation in parents ranges + * property. + */ + ranges = <0 0 0x02000000 0 0xdffe0500 0x100 + 1 0 0x02000000 0 0xdffe0600 0x100 + 2 0 0x02000000 0 0xdffe0700 0x100>; + + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "intel,ce4100-i2c-controller"; + + /* The first number in the reg property is the + * number of the bar + */ + reg = <0 0 0x100>; + + /* This I2C controller has no devices */ + }; + + i2c@1 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "intel,ce4100-i2c-controller"; + reg = <1 0 0x100>; + + /* This I2C controller has one gpio controller */ + gpio@26 { + #gpio-cells = <2>; + compatible = "ti,pcf8575"; + reg = <0x26>; + gpio-controller; + }; + }; + + i2c@2 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "intel,ce4100-i2c-controller"; + reg = <2 0 0x100>; + + gpio@26 { + #gpio-cells = <2>; + compatible = "ti,pcf8575"; + reg = <0x26>; + gpio-controller; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/x86/ce4100.txt b/Documentation/devicetree/bindings/x86/ce4100.txt new file mode 100644 index 000000000000..b49ae593a60b --- /dev/null +++ b/Documentation/devicetree/bindings/x86/ce4100.txt @@ -0,0 +1,38 @@ +CE4100 Device Tree Bindings +--------------------------- + +The CE4100 SoC uses for in core peripherals the following compatible +format: ,-. +Many of the "generic" devices like HPET or IO APIC have the ce4100 +name in their compatible property because they first appeared in this +SoC. + +The CPU node +------------ + cpu@0 { + device_type = "cpu"; + compatible = "intel,ce4100"; + reg = <0>; + lapic = <&lapic0>; + }; + +The reg property describes the CPU number. The lapic property points to +the local APIC timer. + +The SoC node +------------ + +This node describes the in-core peripherals. Required property: + compatible = "intel,ce4100-cp"; + +The PCI node +------------ +This node describes the PCI bus on the SoC. Its property should be + compatible = "intel,ce4100-pci", "pci"; + +If the OS is using the IO-APIC for interrupt routing then the reported +interrupt numbers for devices is no longer true. In order to obtain the +correct interrupt number, the child node which represents the device has +to contain the interrupt property. Besides the interrupt property it has +to contain at least the reg property containing the PCI bus address and +compatible property according to "PCI Bus Binding Revision 2.1". diff --git a/Documentation/devicetree/bindings/x86/interrupt.txt b/Documentation/devicetree/bindings/x86/interrupt.txt new file mode 100644 index 000000000000..8b0efb097e60 --- /dev/null +++ b/Documentation/devicetree/bindings/x86/interrupt.txt @@ -0,0 +1,29 @@ +Interrupt chips +--------------- + +* Intel I/O Advanced Programmable Interrupt Controller (IO APIC) + + Required properties: + -------------------- + compatible = "intel,ce4100-ioapic"; + #interrupt-cells = <2>; + + Device's interrupt property: + + interrupts =

; + + The first number (P) represents the interrupt pin which is wired to the + IO APIC. The second number (S) represents the sense of interrupt which + should be configured and can be one of: + 0 - Edge Rising + 1 - Level Low + 2 - Level High + 3 - Edge Falling + +* Local APIC + Required property: + + compatible = "intel,ce4100-lapic"; + + This node is currently unused by Linux as the address of the local APIC + read from a MSR. diff --git a/Documentation/devicetree/bindings/x86/timer.txt b/Documentation/devicetree/bindings/x86/timer.txt new file mode 100644 index 000000000000..c688af58e3bd --- /dev/null +++ b/Documentation/devicetree/bindings/x86/timer.txt @@ -0,0 +1,6 @@ +Timers +------ + +* High Precision Event Timer (HPET) + Required property: + compatible = "intel,ce4100-hpet"; diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts new file mode 100644 index 000000000000..dc701ea58546 --- /dev/null +++ b/arch/x86/platform/ce4100/falconfalls.dts @@ -0,0 +1,428 @@ +/* + * CE4100 on Falcon Falls + * + * (c) Copyright 2010 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ +/dts-v1/; +/ { + model = "intel,falconfalls"; + compatible = "intel,falconfalls"; + #address-cells = <1>; + #size-cells = <1>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "intel,ce4100"; + reg = <0>; + lapic = <&lapic0>; + }; + }; + + soc@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "intel,ce4100-cp"; + ranges; + + ioapic1: interrupt-controller@fec00000 { + #interrupt-cells = <2>; + compatible = "intel,ce4100-ioapic"; + interrupt-controller; + reg = <0xfec00000 0x1000>; + }; + + timer@fed00000 { + compatible = "intel,ce4100-hpet"; + reg = <0xfed00000 0x200>; + }; + + lapic0: interrupt-controller@fee00000 { + compatible = "intel,ce4100-lapic"; + reg = <0xfee00000 0x1000>; + }; + + pci@3fc { + #address-cells = <3>; + #size-cells = <2>; + compatible = "intel,ce4100-pci", "pci"; + device_type = "pci"; + bus-range = <0 0>; + ranges = <0x2000000 0 0xbffff000 0xbffff000 0 0x1000 + 0x2000000 0 0xdffe0000 0xdffe0000 0 0x1000 + 0x0000000 0 0x0 0x0 0 0x100>; + + /* Secondary IO-APIC */ + ioapic2: interrupt-controller@0,1 { + #interrupt-cells = <2>; + compatible = "intel,ce4100-ioapic"; + interrupt-controller; + reg = <0x100 0x0 0x0 0x0 0x0>; + assigned-addresses = <0x02000000 0x0 0xbffff000 0x0 0x1000>; + }; + + pci@1,0 { + #address-cells = <3>; + #size-cells = <2>; + compatible = "intel,ce4100-pci", "pci"; + device_type = "pci"; + bus-range = <1 1>; + ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>; + + interrupt-parent = <&ioapic2>; + + display@2,0 { + compatible = "pci8086,2e5b.2", + "pci8086,2e5b", + "pciclass038000", + "pciclass0380"; + + reg = <0x11000 0x0 0x0 0x0 0x0>; + interrupts = <0 1>; + }; + + multimedia@3,0 { + compatible = "pci8086,2e5c.2", + "pci8086,2e5c", + "pciclass048000", + "pciclass0480"; + + reg = <0x11800 0x0 0x0 0x0 0x0>; + interrupts = <2 1>; + }; + + multimedia@4,0 { + compatible = "pci8086,2e5d.2", + "pci8086,2e5d", + "pciclass048000", + "pciclass0480"; + + reg = <0x12000 0x0 0x0 0x0 0x0>; + interrupts = <4 1>; + }; + + multimedia@4,1 { + compatible = "pci8086,2e5e.2", + "pci8086,2e5e", + "pciclass048000", + "pciclass0480"; + + reg = <0x12100 0x0 0x0 0x0 0x0>; + interrupts = <5 1>; + }; + + sound@6,0 { + compatible = "pci8086,2e5f.2", + "pci8086,2e5f", + "pciclass040100", + "pciclass0401"; + + reg = <0x13000 0x0 0x0 0x0 0x0>; + interrupts = <6 1>; + }; + + sound@6,1 { + compatible = "pci8086,2e5f.2", + "pci8086,2e5f", + "pciclass040100", + "pciclass0401"; + + reg = <0x13100 0x0 0x0 0x0 0x0>; + interrupts = <7 1>; + }; + + sound@6,2 { + compatible = "pci8086,2e60.2", + "pci8086,2e60", + "pciclass040100", + "pciclass0401"; + + reg = <0x13200 0x0 0x0 0x0 0x0>; + interrupts = <8 1>; + }; + + display@8,0 { + compatible = "pci8086,2e61.2", + "pci8086,2e61", + "pciclass038000", + "pciclass0380"; + + reg = <0x14000 0x0 0x0 0x0 0x0>; + interrupts = <9 1>; + }; + + display@8,1 { + compatible = "pci8086,2e62.2", + "pci8086,2e62", + "pciclass038000", + "pciclass0380"; + + reg = <0x14100 0x0 0x0 0x0 0x0>; + interrupts = <10 1>; + }; + + multimedia@8,2 { + compatible = "pci8086,2e63.2", + "pci8086,2e63", + "pciclass048000", + "pciclass0480"; + + reg = <0x14200 0x0 0x0 0x0 0x0>; + interrupts = <11 1>; + }; + + entertainment-encryption@9,0 { + compatible = "pci8086,2e64.2", + "pci8086,2e64", + "pciclass101000", + "pciclass1010"; + + reg = <0x14800 0x0 0x0 0x0 0x0>; + interrupts = <12 1>; + }; + + localbus@a,0 { + compatible = "pci8086,2e65.2", + "pci8086,2e65", + "pciclassff0000", + "pciclassff00"; + + reg = <0x15000 0x0 0x0 0x0 0x0>; + }; + + serial@b,0 { + compatible = "pci8086,2e66.2", + "pci8086,2e66", + "pciclass070003", + "pciclass0700"; + + reg = <0x15800 0x0 0x0 0x0 0x0>; + interrupts = <14 1>; + }; + + gpio@b,1 { + compatible = "pci8086,2e67.2", + "pci8086,2e67", + "pciclassff0000", + "pciclassff00"; + + #gpio-cells = <2>; + reg = <0x15900 0x0 0x0 0x0 0x0>; + interrupts = <15 1>; + gpio-controller; + }; + + i2c-controller@b,2 { + #address-cells = <2>; + #size-cells = <1>; + compatible = "pci8086,2e68.2", + "pci8086,2e68", + "pciclass,ff0000", + "pciclass,ff00"; + + reg = <0x15a00 0x0 0x0 0x0 0x0>; + interrupts = <16 1>; + ranges = <0 0 0x02000000 0 0xdffe0500 0x100 + 1 0 0x02000000 0 0xdffe0600 0x100 + 2 0 0x02000000 0 0xdffe0700 0x100>; + + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "intel,ce4100-i2c-controller"; + reg = <0 0 0x100>; + }; + + i2c@1 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "intel,ce4100-i2c-controller"; + reg = <1 0 0x100>; + + gpio@26 { + #gpio-cells = <2>; + compatible = "ti,pcf8575"; + reg = <0x26>; + gpio-controller; + }; + }; + + i2c@2 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "intel,ce4100-i2c-controller"; + reg = <2 0 0x100>; + + gpio@26 { + #gpio-cells = <2>; + compatible = "ti,pcf8575"; + reg = <0x26>; + gpio-controller; + }; + }; + }; + + smard-card@b,3 { + compatible = "pci8086,2e69.2", + "pci8086,2e69", + "pciclass070500", + "pciclass0705"; + + reg = <0x15b00 0x0 0x0 0x0 0x0>; + interrupts = <15 1>; + }; + + spi-controller@b,4 { + #address-cells = <1>; + #size-cells = <0>; + compatible = + "pci8086,2e6a.2", + "pci8086,2e6a", + "pciclass,ff0000", + "pciclass,ff00"; + + reg = <0x15c00 0x0 0x0 0x0 0x0>; + interrupts = <15 1>; + + dac@0 { + compatible = "ti,pcm1755"; + reg = <0>; + spi-max-frequency = <115200>; + }; + + dac@1 { + compatible = "ti,pcm1609a"; + reg = <1>; + spi-max-frequency = <115200>; + }; + + eeprom@2 { + compatible = "atmel,at93c46"; + reg = <2>; + spi-max-frequency = <115200>; + }; + }; + + multimedia@b,7 { + compatible = "pci8086,2e6d.2", + "pci8086,2e6d", + "pciclassff0000", + "pciclassff00"; + + reg = <0x15f00 0x0 0x0 0x0 0x0>; + }; + + ethernet@c,0 { + compatible = "pci8086,2e6e.2", + "pci8086,2e6e", + "pciclass020000", + "pciclass0200"; + + reg = <0x16000 0x0 0x0 0x0 0x0>; + interrupts = <21 1>; + }; + + clock@c,1 { + compatible = "pci8086,2e6f.2", + "pci8086,2e6f", + "pciclassff0000", + "pciclassff00"; + + reg = <0x16100 0x0 0x0 0x0 0x0>; + interrupts = <3 1>; + }; + + usb@d,0 { + compatible = "pci8086,2e70.2", + "pci8086,2e70", + "pciclass0c0320", + "pciclass0c03"; + + reg = <0x16800 0x0 0x0 0x0 0x0>; + interrupts = <22 3>; + }; + + usb@d,1 { + compatible = "pci8086,2e70.2", + "pci8086,2e70", + "pciclass0c0320", + "pciclass0c03"; + + reg = <0x16900 0x0 0x0 0x0 0x0>; + interrupts = <22 3>; + }; + + sata@e,0 { + compatible = "pci8086,2e71.0", + "pci8086,2e71", + "pciclass010601", + "pciclass0106"; + + reg = <0x17000 0x0 0x0 0x0 0x0>; + interrupts = <23 3>; + }; + + flash@f,0 { + compatible = "pci8086,701.1", + "pci8086,701", + "pciclass050100", + "pciclass0501"; + + reg = <0x17800 0x0 0x0 0x0 0x0>; + interrupts = <13 1>; + }; + + entertainment-encryption@10,0 { + compatible = "pci8086,702.1", + "pci8086,702", + "pciclass101000", + "pciclass1010"; + + reg = <0x18000 0x0 0x0 0x0 0x0>; + }; + + co-processor@11,0 { + compatible = "pci8086,703.1", + "pci8086,703", + "pciclass0b4000", + "pciclass0b40"; + + reg = <0x18800 0x0 0x0 0x0 0x0>; + interrupts = <1 1>; + }; + + multimedia@12,0 { + compatible = "pci8086,704.0", + "pci8086,704", + "pciclass048000", + "pciclass0480"; + + reg = <0x19000 0x0 0x0 0x0 0x0>; + }; + }; + + isa@1f,0 { + #address-cells = <2>; + #size-cells = <1>; + compatible = "isa"; + ranges = <1 0 0 0 0 0x100>; + + rtc@70 { + compatible = "intel,ce4100-rtc", "motorola,mc146818"; + interrupts = <8 3>; + interrupt-parent = <&ioapic1>; + ctrl-reg = <2>; + freq-reg = <0x26>; + reg = <1 0x70 2>; + }; + }; + }; + }; +}; -- cgit v1.2.3 From 3bcbaf6e08d8d82cde781997bd2c56dda87049b5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 22 Feb 2011 21:07:46 +0100 Subject: rtc: cmos: Add OF bindings This allows to load the OF driver based informations from the device tree. Systems without BIOS may need to perform some initialization. PowerPC creates a PNP device from the OF information and performs this kind of initialization in their private PCI quirk. This looks more generic. This patch also avoids registering the platform RTC driver on X86 if we have a device tree blob. Otherwise we would setup the device based on the hardcoded information in arch/x86 rather than the device tree based one. [ tglx: Changed "int of_have_populated_dt()" to bool as recommended by Grant ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Dirk Brandewie Acked-by: Grant Likely Cc: sodaville@linutronix.de Cc: devicetree-discuss@lists.ozlabs.org Cc: rtc-linux@googlegroups.com Cc: Alessandro Zummo LKML-Reference: <1298405266-1624-12-git-send-email-bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner --- Documentation/devicetree/bindings/rtc/rtc-cmos.txt | 28 ++++++++++++++ arch/x86/kernel/rtc.c | 3 ++ drivers/rtc/rtc-cmos.c | 45 ++++++++++++++++++++++ include/linux/of.h | 12 ++++++ 4 files changed, 88 insertions(+) create mode 100644 Documentation/devicetree/bindings/rtc/rtc-cmos.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/rtc/rtc-cmos.txt b/Documentation/devicetree/bindings/rtc/rtc-cmos.txt new file mode 100644 index 000000000000..7382989b3052 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/rtc-cmos.txt @@ -0,0 +1,28 @@ + Motorola mc146818 compatible RTC +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Required properties: + - compatible : "motorola,mc146818" + - reg : should contain registers location and length. + +Optional properties: + - interrupts : should contain interrupt. + - interrupt-parent : interrupt source phandle. + - ctrl-reg : Contains the initial value of the control register also + called "Register B". + - freq-reg : Contains the initial value of the frequency register also + called "Regsiter A". + +"Register A" and "B" are usually initialized by the firmware (BIOS for +instance). If this is not done, it can be performed by the driver. + +ISA Example: + + rtc@70 { + compatible = "motorola,mc146818"; + interrupts = <8 3>; + interrupt-parent = <&ioapic1>; + ctrl-reg = <2>; + freq-reg = <0x26>; + reg = <1 0x70 2>; + }; diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 6f39cab052d5..3f2ad2640d85 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -236,6 +237,8 @@ static __init int add_rtc_cmos(void) } } #endif + if (of_have_populated_dt()) + return 0; platform_device_register(&rtc_device); dev_info(&rtc_device.dev, diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index c7ff8df347e7..159b95e4b420 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include /* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */ #include @@ -1123,6 +1125,47 @@ static struct pnp_driver cmos_pnp_driver = { #endif /* CONFIG_PNP */ +#ifdef CONFIG_OF +static const struct of_device_id of_cmos_match[] = { + { + .compatible = "motorola,mc146818", + }, + { }, +}; +MODULE_DEVICE_TABLE(of, of_cmos_match); + +static __init void cmos_of_init(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct rtc_time time; + int ret; + const __be32 *val; + + if (!node) + return; + + val = of_get_property(node, "ctrl-reg", NULL); + if (val) + CMOS_WRITE(be32_to_cpup(val), RTC_CONTROL); + + val = of_get_property(node, "freq-reg", NULL); + if (val) + CMOS_WRITE(be32_to_cpup(val), RTC_FREQ_SELECT); + + get_rtc_time(&time); + ret = rtc_valid_tm(&time); + if (ret) { + struct rtc_time def_time = { + .tm_year = 1, + .tm_mday = 1, + }; + set_rtc_time(&def_time); + } +} +#else +static inline void cmos_of_init(struct platform_device *pdev) {} +#define of_cmos_match NULL +#endif /*----------------------------------------------------------------*/ /* Platform setup should have set up an RTC device, when PNP is @@ -1131,6 +1174,7 @@ static struct pnp_driver cmos_pnp_driver = { static int __init cmos_platform_probe(struct platform_device *pdev) { + cmos_of_init(pdev); cmos_wake_setup(&pdev->dev); return cmos_do_probe(&pdev->dev, platform_get_resource(pdev, IORESOURCE_IO, 0), @@ -1162,6 +1206,7 @@ static struct platform_driver cmos_platform_driver = { #ifdef CONFIG_PM .pm = &cmos_pm_ops, #endif + .of_match_table = of_cmos_match, } }; diff --git a/include/linux/of.h b/include/linux/of.h index d9dd664a6a9c..266db1d0baa9 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -70,6 +70,11 @@ extern struct device_node *allnodes; extern struct device_node *of_chosen; extern rwlock_t devtree_lock; +static inline bool of_have_populated_dt(void) +{ + return allnodes != NULL; +} + static inline bool of_node_is_root(const struct device_node *node) { return node && (node->parent == NULL); @@ -222,5 +227,12 @@ extern void of_attach_node(struct device_node *); extern void of_detach_node(struct device_node *); #endif +#else + +static inline bool of_have_populated_dt(void) +{ + return false; +} + #endif /* CONFIG_OF */ #endif /* _LINUX_OF_H */ -- cgit v1.2.3 From 8d32a307e4faa8b123dc8a9cd56d1a7525f69ad3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Feb 2011 23:52:23 +0000 Subject: genirq: Provide forced interrupt threading Add a commandline parameter "threadirqs" which forces all interrupts except those marked IRQF_NO_THREAD to run threaded. That's mostly a debug option to allow retrieving better debug data from crashing interrupt handlers. If "threadirqs" is not enabled on the kernel command line, then there is no impact in the interrupt hotpath. Architecture code needs to select CONFIG_IRQ_FORCED_THREADING after marking the interrupts which cant be threaded IRQF_NO_THREAD. All interrupts which have IRQF_TIMER set are implict marked IRQF_NO_THREAD. Also all PER_CPU interrupts are excluded. Forced threading hard interrupts also forces all soft interrupt handling into thread context. When enabled it might slow down things a bit, but for debugging problems in interrupt code it's a reasonable penalty as it does not immediately crash and burn the machine when an interrupt handler is buggy. Some test results on a Core2Duo machine: Cache cold run of: # time git grep irq_desc non-threaded threaded real 1m18.741s 1m19.061s user 0m1.874s 0m1.757s sys 0m5.843s 0m5.427s # iperf -c server non-threaded [ 3] 0.0-10.0 sec 1.09 GBytes 933 Mbits/sec [ 3] 0.0-10.0 sec 1.09 GBytes 934 Mbits/sec [ 3] 0.0-10.0 sec 1.09 GBytes 933 Mbits/sec threaded [ 3] 0.0-10.0 sec 1.09 GBytes 939 Mbits/sec [ 3] 0.0-10.0 sec 1.09 GBytes 934 Mbits/sec [ 3] 0.0-10.0 sec 1.09 GBytes 937 Mbits/sec Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra LKML-Reference: <20110223234956.772668648@linutronix.de> --- Documentation/kernel-parameters.txt | 4 +++ include/linux/interrupt.h | 7 ++++ kernel/irq/Kconfig | 3 ++ kernel/irq/internals.h | 2 ++ kernel/irq/manage.c | 67 ++++++++++++++++++++++++++++++++++--- kernel/softirq.c | 16 +++++++-- 6 files changed, 93 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 89835a4766a6..cac6cf9a588c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2436,6 +2436,10 @@ and is between 256 and 4096 characters. It is defined in the file : poll all this frequency 0: no polling (default) + threadirqs [KNL] + Force threading of all interrupt handlers except those + marked explicitely IRQF_NO_THREAD. + topology= [S390] Format: {off | on} Specify if the kernel should make use of the cpu diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0fc3eb9397b4..f8a8af108e0c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -383,6 +383,13 @@ static inline int disable_irq_wake(unsigned int irq) } #endif /* CONFIG_GENERIC_HARDIRQS */ + +#ifdef CONFIG_IRQ_FORCED_THREADING +extern bool force_irqthreads; +#else +#define force_irqthreads (0) +#endif + #ifndef __ARCH_SET_SOFTIRQ_PENDING #define set_softirq_pending(x) (local_softirq_pending() = (x)) #define or_softirq_pending(x) (local_softirq_pending() |= (x)) diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 355b8c7957f5..144db9dcfcde 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -38,6 +38,9 @@ config HARDIRQS_SW_RESEND config IRQ_PREFLOW_FASTEOI bool +config IRQ_FORCED_THREADING + bool + config SPARSE_IRQ bool "Support sparse irq numbering" depends on HAVE_SPARSE_IRQ diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 935bec4bfa87..6c6ec9a49027 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -27,12 +27,14 @@ extern int noirqdebug; * IRQTF_DIED - handler thread died * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed * IRQTF_AFFINITY - irq thread is requested to adjust affinity + * IRQTF_FORCED_THREAD - irq action is force threaded */ enum { IRQTF_RUNTHREAD, IRQTF_DIED, IRQTF_WARNED, IRQTF_AFFINITY, + IRQTF_FORCED_THREAD, }; /* diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 58c861367300..acd599a43bfb 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -17,6 +17,17 @@ #include "internals.h" +#ifdef CONFIG_IRQ_FORCED_THREADING +__read_mostly bool force_irqthreads; + +static int __init setup_forced_irqthreads(char *arg) +{ + force_irqthreads = true; + return 0; +} +early_param("threadirqs", setup_forced_irqthreads); +#endif + /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) * @irq: interrupt number to wait for @@ -701,6 +712,32 @@ static inline void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } #endif +/* + * Interrupts which are not explicitely requested as threaded + * interrupts rely on the implicit bh/preempt disable of the hard irq + * context. So we need to disable bh here to avoid deadlocks and other + * side effects. + */ +static void +irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) +{ + local_bh_disable(); + action->thread_fn(action->irq, action->dev_id); + irq_finalize_oneshot(desc, action, false); + local_bh_enable(); +} + +/* + * Interrupts explicitely requested as threaded interupts want to be + * preemtible - many of them need to sleep and wait for slow busses to + * complete. + */ +static void irq_thread_fn(struct irq_desc *desc, struct irqaction *action) +{ + action->thread_fn(action->irq, action->dev_id); + irq_finalize_oneshot(desc, action, false); +} + /* * Interrupt handler thread */ @@ -711,8 +748,15 @@ static int irq_thread(void *data) }; struct irqaction *action = data; struct irq_desc *desc = irq_to_desc(action->irq); + void (*handler_fn)(struct irq_desc *desc, struct irqaction *action); int wake; + if (force_irqthreads & test_bit(IRQTF_FORCED_THREAD, + &action->thread_flags)) + handler_fn = irq_forced_thread_fn; + else + handler_fn = irq_thread_fn; + sched_setscheduler(current, SCHED_FIFO, ¶m); current->irqaction = action; @@ -736,10 +780,7 @@ static int irq_thread(void *data) raw_spin_unlock_irq(&desc->lock); } else { raw_spin_unlock_irq(&desc->lock); - - action->thread_fn(action->irq, action->dev_id); - - irq_finalize_oneshot(desc, action, false); + handler_fn(desc, action); } wake = atomic_dec_and_test(&desc->threads_active); @@ -789,6 +830,22 @@ void exit_irq_thread(void) set_bit(IRQTF_DIED, &tsk->irqaction->flags); } +static void irq_setup_forced_threading(struct irqaction *new) +{ + if (!force_irqthreads) + return; + if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) + return; + + new->flags |= IRQF_ONESHOT; + + if (!new->thread_fn) { + set_bit(IRQTF_FORCED_THREAD, &new->thread_flags); + new->thread_fn = new->handler; + new->handler = irq_default_primary_handler; + } +} + /* * Internal function to register an irqaction - typically used to * allocate special interrupts that are part of the architecture. @@ -838,6 +895,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * dummy function which warns when called. */ new->handler = irq_nested_primary_handler; + } else { + irq_setup_forced_threading(new); } /* diff --git a/kernel/softirq.c b/kernel/softirq.c index c0490464e92f..a33fb2911248 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -311,9 +311,21 @@ void irq_enter(void) } #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED -# define invoke_softirq() __do_softirq() +static inline void invoke_softirq(void) +{ + if (!force_irqthreads) + __do_softirq(); + else + wakeup_softirqd(); +} #else -# define invoke_softirq() do_softirq() +static inline void invoke_softirq(void) +{ + if (!force_irqthreads) + do_softirq(); + else + wakeup_softirqd(); +} #endif /* -- cgit v1.2.3