summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2013-04-17 19:25:05 -0400
committerLen Brown <len.brown@intel.com>2013-04-17 19:25:05 -0400
commit5c99726b5faaf1ba4ca80c39d319f419c227f803 (patch)
tree35f7a3c77fd04a2003967afea7fd947597d59695 /tools
parent86239ceb33b0d8480b0f0ca0eec08e7f7a807374 (diff)
parent231ebb3b15380466ef91fda0cf33150acc3e03a6 (diff)
Merge branch 'fspin' into next
Diffstat (limited to 'tools')
-rw-r--r--tools/lib/traceevent/Makefile2
-rw-r--r--tools/perf/Makefile8
-rw-r--r--tools/perf/bench/bench.h24
-rw-r--r--tools/perf/builtin-record.c6
-rw-r--r--tools/perf/util/hist.h5
-rw-r--r--tools/perf/util/strlist.c2
-rw-r--r--tools/power/fspin/Makefile21
-rw-r--r--tools/power/fspin/fspin.168
-rw-r--r--tools/power/fspin/fspin.c443
9 files changed, 572 insertions, 7 deletions
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index a20e32033431..0b0a90787db6 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -122,7 +122,7 @@ export Q VERBOSE
EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
-INCLUDES = -I. -I/usr/local/include $(CONFIG_INCLUDES)
+INCLUDES = -I. $(CONFIG_INCLUDES)
# Set compile option CFLAGS if not set elsewhere
CFLAGS ?= -g -Wall
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index a2108ca1cc17..bb74c79cd16e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -95,7 +95,7 @@ ifeq ("$(origin DEBUG)", "command line")
PERF_DEBUG = $(DEBUG)
endif
ifndef PERF_DEBUG
- CFLAGS_OPTIMIZE = -O6 -D_FORTIFY_SOURCE=2
+ CFLAGS_OPTIMIZE = -O6
endif
ifdef PARSER_DEBUG
@@ -180,6 +180,12 @@ ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wvolatile-register-var,-W
CFLAGS := $(CFLAGS) -Wvolatile-register-var
endif
+ifndef PERF_DEBUG
+ ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -D_FORTIFY_SOURCE=2,-D_FORTIFY_SOURCE=2),y)
+ CFLAGS := $(CFLAGS) -D_FORTIFY_SOURCE=2
+ endif
+endif
+
### --- END CONFIGURATION SECTION ---
ifeq ($(srctree),)
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index a5223e6a7b43..0fdc85269c4d 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -1,6 +1,30 @@
#ifndef BENCH_H
#define BENCH_H
+/*
+ * The madvise transparent hugepage constants were added in glibc
+ * 2.13. For compatibility with older versions of glibc, define these
+ * tokens if they are not already defined.
+ *
+ * PA-RISC uses different madvise values from other architectures and
+ * needs to be special-cased.
+ */
+#ifdef __hppa__
+# ifndef MADV_HUGEPAGE
+# define MADV_HUGEPAGE 67
+# endif
+# ifndef MADV_NOHUGEPAGE
+# define MADV_NOHUGEPAGE 68
+# endif
+#else
+# ifndef MADV_HUGEPAGE
+# define MADV_HUGEPAGE 14
+# endif
+# ifndef MADV_NOHUGEPAGE
+# define MADV_NOHUGEPAGE 15
+# endif
+#endif
+
extern int bench_numa(int argc, const char **argv, const char *prefix);
extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 774c90713a53..f1a939ebc19c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -573,13 +573,15 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
perf_event__synthesize_guest_os, tool);
}
- if (!opts->target.system_wide)
+ if (perf_target__has_task(&opts->target))
err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
process_synthesized_event,
machine);
- else
+ else if (perf_target__has_cpu(&opts->target))
err = perf_event__synthesize_threads(tool, process_synthesized_event,
machine);
+ else /* command specified */
+ err = 0;
if (err != 0)
goto out_delete_session;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 38624686ee9a..226a4ae2f936 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -208,8 +208,9 @@ static inline int script_browse(const char *script_opt __maybe_unused)
return 0;
}
-#define K_LEFT -1
-#define K_RIGHT -2
+#define K_LEFT -1000
+#define K_RIGHT -2000
+#define K_SWITCH_INPUT_DATA -3000
#endif
#ifdef GTK2_SUPPORT
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
index 55433aa42c8f..eabdce0a2daa 100644
--- a/tools/perf/util/strlist.c
+++ b/tools/perf/util/strlist.c
@@ -143,7 +143,7 @@ struct strlist *strlist__new(bool dupstr, const char *list)
slist->rblist.node_delete = strlist__node_delete;
slist->dupstr = dupstr;
- if (slist && strlist__parse_list(slist, list) != 0)
+ if (list && strlist__parse_list(slist, list) != 0)
goto out_error;
}
diff --git a/tools/power/fspin/Makefile b/tools/power/fspin/Makefile
new file mode 100644
index 000000000000..527400782943
--- /dev/null
+++ b/tools/power/fspin/Makefile
@@ -0,0 +1,21 @@
+CC = $(CROSS_COMPILE)gcc
+BUILD_OUTPUT := $(PWD)
+PREFIX := /usr
+DESTDIR :=
+
+fspin : fspin.c
+CFLAGS += -Wall
+
+%: %.c
+ @mkdir -p $(BUILD_OUTPUT)
+ $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ -lpthread
+
+.PHONY : clean
+clean :
+ @rm -f $(BUILD_OUTPUT)/fspin
+
+install : fspin
+ install -d $(DESTDIR)$(PREFIX)/bin
+ install $(BUILD_OUTPUT)/fspin $(DESTDIR)$(PREFIX)/bin/fspin
+ install -d $(DESTDIR)$(PREFIX)/share/man/man1
+ install fspin.1 $(DESTDIR)$(PREFIX)/share/man/man1
diff --git a/tools/power/fspin/fspin.1 b/tools/power/fspin/fspin.1
new file mode 100644
index 000000000000..b57308e03b82
--- /dev/null
+++ b/tools/power/fspin/fspin.1
@@ -0,0 +1,68 @@
+.\" This page Copyright (C) 2013 Len Brown <len.brown@intel.com>
+.\" Distributed under the GPL, Copyleft 1994.
+.TH FSPIN 8
+.SH NAME
+fspin \- simple workload for power experiments
+.SH SYNOPSIS
+.ft B
+.B fspin
+.RB [ "\-v" ]
+.RB [ "\-i iterations" ]
+.RB [ "\-s sec_per_iteration" ]
+.RB [ "\-t threads" ]
+.RB [ "\-b bin_to_cpus" ]
+.RB [ "\-m memory (b|k|m)" ]
+.br
+.SH DESCRIPTION
+\fBfspin\fP
+heats up the hardware by running a
+floating-point spin loop per processor.
+Every
+.I interval_sec
+fspin presents the sum of the work completed
+by all threads.
+.SS Options
+.PP
+\fB-v\fP increases verbosity.
+By default, fspin prints only the quantity work completed.
+.PP
+\fB-s sec_per_iteration\fP
+Print the indicator of work completed every
+sec_per_interval seconds. By default, 5 sec.
+.PP
+\fB-t threads\fP
+Create
+.I threads
+software threads. Default is number of
+logical processors available, or if '-b' option is used,
+one thread per bound processor.
+.PP
+\fB-b bind_to_cpus\fP
+Bind the threads to the indicated list of comma-separated CPU numbers.
+A range of CPUs can be specified by using '-'.
+.PP
+\fB-i iterations\fP
+Exit after
+.I iterations
+and print total of work completed.
+Default is to continue running forever, printing work per iteration/sec.
+.PP
+\fB-m memory\fP
+Allocate arrays of
+.I memory_size,
+which is followed by a modifier b|k|m, for bytes, kilobytes, or megabytes,
+respectively. Default is 512 bytes, which will spin in-cache.
+Increase this number to exercise larger caches and memory.
+
+.SH WHAT FSPIN IS NOT
+Fspin is just a simple tool,
+and has not be characterized as a
+.I performance benchmark.
+Fspin is not a
+.I power virus for cooling HW design,
+as there are better tools, specialized for that purpose.
+
+.PP
+.SH AUTHORS
+.nf
+Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/fspin/fspin.c b/tools/power/fspin/fspin.c
new file mode 100644
index 000000000000..38288c1d61bb
--- /dev/null
+++ b/tools/power/fspin/fspin.c
@@ -0,0 +1,443 @@
+/*
+ * fspin.c - user utility to burn CPU cycles, thrash the cache and memory
+ *
+ * Copyright (c) 2013, Intel Corporation.
+ * Len Brown <len.brown@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/*
+ * Creates one thread per logical processor (override with -t).
+ * Threads run on any processor (override with -b).
+ * Each thread allocates and initializes its own data.
+ * Then it processes the data using an infinite DAXPY loop:
+ * Double precision Y[i] = A*X[i] + Y[i]
+ *
+ * The parent thread wakes up every reporting interval,
+ * (override 5 sec default with -i),
+ * sums up and prints aggregate performance.
+ *
+ * The actual computation is somewhat arbitrary, if not random.
+ * The performance number is intended only to be compared to itself
+ * on the same machine, to illustrate how various power limiting
+ * techniques impact performance.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <errno.h>
+#include <sys/time.h>
+
+#define BANNER "fspin v1.1, April 7, 2013 - Len Brown <len.brown@intel.com>"
+
+#define handle_error_en(en, msg) \
+ do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0)
+
+#define handle_error(msg) \
+ do { perror(msg); exit(EXIT_FAILURE); } while (0)
+
+struct thread_info { /* Used as argument to spin_loop() */
+ pthread_t thread_id; /* ID returned by pthread_create() */
+ int thread_num; /* Application-defined thread # */
+};
+
+struct padded {
+ double counter; /* 8 bytes */
+ double pad[(32 - 1)]; /* round up to 256 byte line */
+} *thread_data;
+
+int num_threads;
+int thread_num_override;
+int data_bytes = 512;
+int nrcpus = 64;
+int sec_per_interval = 5; /* seconds */
+int iterations;
+int verbose;
+int do_binding;
+
+cpu_set_t *cpu_affinity_set;
+size_t cpu_affinity_setsize;
+
+void
+allocate_cpusets()
+{
+ /*
+ * Allocate and initialize cpu_affinity_set
+ */
+ cpu_affinity_set = CPU_ALLOC(nrcpus);
+ if (cpu_affinity_set == NULL) {
+ perror("CPU_ALLOC");
+ exit(3);
+ }
+ cpu_affinity_setsize = CPU_ALLOC_SIZE(nrcpus);
+ CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
+}
+
+void
+bind_to_cpus()
+{
+ if (!do_binding)
+ return;
+
+ if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) {
+ fprintf(stderr, "bind_to_cpus() failed\n");
+ perror("sched_setaffinity");
+ exit(-1);
+ }
+}
+
+int get_num_cpus()
+{
+ cpu_set_t *mask;
+ size_t size;
+ int num_cpus;
+
+realloc:
+ mask = CPU_ALLOC(nrcpus);
+ size = CPU_ALLOC_SIZE(nrcpus);
+ CPU_ZERO_S(size, mask);
+ if (sched_getaffinity(0, size, mask) == -1) {
+ CPU_FREE(mask);
+ if (errno == EINVAL &&
+ nrcpus < (1024 << 8)) {
+ nrcpus = nrcpus << 2;
+ goto realloc;
+ }
+ perror("sched_getaffinity");
+ return -1;
+ }
+
+ num_cpus = CPU_COUNT_S(size, mask);
+
+ CPU_FREE(mask);
+
+ return num_cpus;
+}
+
+static void *spin_loop(void *arg)
+{
+ struct thread_info *tinfo = (struct thread_info *)arg;
+ double *x, *y;
+ int i = 0;
+ int data_entries = data_bytes / sizeof(double);
+ unsigned long long bitmask = random();
+
+
+ x = malloc(data_bytes);
+ y = malloc(data_bytes);
+
+ if (x == NULL || y == NULL) {
+ perror("malloc");
+ exit(-1);
+ }
+
+ /*
+ * seed data array with random bits
+ */
+ for (i = 0; i < data_entries; ++i) {
+ x[i] = 1.0 + i * bitmask;
+ y[i] = 1.0 + i * bitmask;
+ }
+
+ for (i = 0; ; i++) {
+
+ double a = 3.1415926535 * i;
+
+ y[i] = a * x[i] + y[i]; /* DAXPY */
+
+ thread_data[tinfo->thread_num].counter++;
+
+ if (i >= data_entries)
+ i = 0;
+ }
+ /* not reached */
+}
+
+void usage()
+{
+ fprintf(stderr,
+ "Usage: fspin [-v][-s sec_per_iteration][-i iterations][-t num_threads][-b cpu_list][-m memory(b|k|m)]\n");
+ fprintf(stderr, "\twhere 'cpu_list' is comma and dash separated numbers with no spaces\n");
+ exit(EXIT_FAILURE);
+}
+
+void parse_error(char *string, char c)
+{
+ fprintf(stderr, "parse error on '%s' at '%c'\n", string, c);
+ usage();
+}
+
+int add_cpu_to_bind_mask(int cpu) {
+ static int num_added;
+
+ /* check if cpu is valid */
+ if (cpu < 0 || cpu > nrcpus) {
+ fprintf(stderr, "invalid cpu %d\n", cpu);
+ exit(1);
+ }
+
+ if (CPU_ISSET_S(cpu, cpu_affinity_setsize, cpu_affinity_set)) {
+ fprintf(stderr, "can't bind to cpu %d more than once\n", cpu);
+ exit(1);
+ }
+
+ /* add cpu to set */
+ CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
+
+ if (verbose)
+ printf("%d, ", cpu);
+
+ num_added += 1;
+
+ return num_added;
+}
+
+
+int
+parse_bind_cpu_list(char *cpu_list)
+{
+ char *p;
+ int range_next = -1;
+ int total_cpus_added = 0;
+
+ allocate_cpusets();
+
+ for(p = cpu_list; *p != '\0'; ) {
+ int num, retval;
+
+ /* remaining list must start w/ valid cpu number */
+
+ if (!isdigit(*p))
+ parse_error(p, *p);
+
+ retval = sscanf(p, "%u", &num);
+ if (retval == EOF)
+ usage();
+ else if (retval == 0)
+ parse_error(p, *p);
+
+ if (range_next >= 0) {
+ if (num <= range_next) /* range must be low to high */
+ parse_error(p, *p);
+
+ for ( ; range_next < num; range_next++)
+ total_cpus_added = add_cpu_to_bind_mask(range_next);
+
+ range_next = -1;
+ }
+
+ total_cpus_added = add_cpu_to_bind_mask(num);
+
+ while (isdigit(*p))
+ p++;
+
+ switch (*p) {
+ case ',':
+ p++;
+ continue;
+ case '-':
+ range_next = num + 1;
+ p++;
+ continue;
+ }
+
+ }
+ return total_cpus_added;
+}
+
+int parse_memory_param(char *p)
+{
+ int bytes;
+ char units;
+
+ if (2 != sscanf(p, "%d%c", &bytes, &units)) {
+ fprintf(stderr, "failed to parse -m\n");
+ usage();
+ }
+ switch (units) {
+ case 'b':
+ case 'B':
+ break;
+ case 'k':
+ case 'K':
+ bytes *= 1024;
+ break;
+ case 'm':
+ case 'M':
+ bytes *= 1024*1024;
+ break;
+ case 'g':
+ case 'G':
+ bytes *= 1024*1024*1024;
+ break;
+ default:
+ fprintf(stderr, "-m: bad memory units, use b, k, m, g\n");
+
+ }
+ return bytes;
+
+}
+
+void parse_args(int argc, char *argv[])
+{
+ int opt;
+
+ nrcpus = get_num_cpus();
+
+ while ((opt = getopt(argc, argv, "s:i:t:b:m:v")) != -1) {
+ switch (opt) {
+ case 's':
+ sec_per_interval = atoi(optarg);
+ if (verbose)
+ printf("sec_per_interval %d\n", sec_per_interval);
+ break;
+ case 'i':
+ iterations = atoi(optarg);
+ if (verbose)
+ printf("iterations %d\n", iterations);
+ break;
+ case 't':
+ thread_num_override = atoi(optarg);
+ if (verbose)
+ printf("Thread Count Override: %d\n", thread_num_override);
+ break;
+ case 'b':
+ do_binding = parse_bind_cpu_list(optarg);
+ if (verbose)
+ printf("Binding to %d CPUs.\n", do_binding);
+ break;
+ case 'm':
+ data_bytes = parse_memory_param(optarg);
+ if (verbose)
+ printf("Memory Override: %d\n", data_bytes);
+ break;
+ case 'v':
+ verbose++;
+ break;
+ default: /* '?' */
+ usage(); /* does not return */
+ }
+ }
+}
+
+unsigned long long lsum_old;
+
+
+struct thread_info *tinfo;
+pthread_attr_t attr;
+
+void create_threads()
+{
+ int s, tnum;
+
+ if (thread_num_override)
+ num_threads = thread_num_override;
+ else if (do_binding)
+ num_threads = do_binding;
+ else
+ num_threads = nrcpus;
+
+ thread_data = calloc(num_threads, sizeof(struct padded));
+ if (thread_data == NULL)
+ handle_error("calloc");
+
+ /* Initialize thread creation attributes */
+
+ s = pthread_attr_init(&attr);
+ if (s != 0)
+ handle_error_en(s, "pthread_attr_init");
+
+ /* Allocate memory for pthread_create() arguments */
+
+ tinfo = calloc(num_threads, sizeof(struct thread_info));
+ if (tinfo == NULL)
+ handle_error("calloc");
+
+ for (tnum = 0; tnum < num_threads; tnum++) {
+ tinfo[tnum].thread_num = tnum;
+
+ /* The pthread_create() call stores the thread ID into
+ * corresponding element of tinfo[]
+ */
+
+ s = pthread_create(&tinfo[tnum].thread_id, &attr,
+ &spin_loop, &tinfo[tnum]);
+ if (s != 0)
+ handle_error_en(s, "pthread_create");
+ }
+ printf("%d threads created\n", num_threads);
+ return;
+}
+
+
+void monitor_threads()
+{
+ struct timespec ts;
+ struct timeval tv_old, tv_new, tv_delta;
+ int i, j;
+ double interval_float;
+ unsigned long long lsum;
+
+ ts.tv_sec = sec_per_interval;
+ ts.tv_nsec = 0;
+ gettimeofday(&tv_old, (struct timezone *)NULL);
+
+ for (i = 0; iterations ? i < iterations : 1 ; i++) {
+
+ if (nanosleep(&ts, NULL) != 0) {
+ perror("nanosleep");
+ exit(-1);
+ }
+
+ for (j = 0, lsum = 0; j < num_threads; ++j)
+ lsum += thread_data[j].counter;
+
+ gettimeofday(&tv_new, NULL);
+ timersub(&tv_new, &tv_old, &tv_delta);
+
+ interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
+ printf("%.2f\n", (lsum - lsum_old)/interval_float/1000000);
+
+ tv_old = tv_new;
+ lsum_old = lsum;
+ }
+ /* summary */
+ for (j = 0, lsum = 0; j < num_threads; ++j) {
+ printf("%d %.2f\n", j, thread_data[j].counter/1000000.0);
+ lsum += thread_data[j].counter;
+ }
+ printf("Total %.2f\n", lsum/1000000.0);
+
+}
+
+
+void print_banner()
+{
+ puts(BANNER);
+}
+
+int main(int argc, char *argv[])
+{
+ parse_args(argc, argv);
+
+
+ print_banner();
+
+ bind_to_cpus();
+
+ create_threads();
+
+ monitor_threads(); /* never returns */
+
+ return 0;
+}