Merge remote-tracking branch 'idle/next'

author: Stephen Rothwell <sfr@canb.auug.org.au> 2013-08-01 12:02:01 +1000
committer: Stephen Rothwell <sfr@canb.auug.org.au> 2013-08-01 12:02:01 +1000
commit: d7c02690314265b425b54e6c4ebf028384f3cf02 (patch)
tree: e2e17d45a1371b9b31a0b9014e10a12305a8f522 /tools
parent: b493633648a132cbcb852210acc2ac5a3e890bdc (diff)
parent: 5c99726b5faaf1ba4ca80c39d319f419c227f803 (diff)
3 files changed, 532 insertions, 0 deletions
diff --git a/tools/power/fspin/Makefile b/tools/power/fspin/Makefile
new file mode 100644
index 000000000000..527400782943
--- /dev/null
+++ b/tools/power/fspin/Makefile
@@ -0,0 +1,21 @@
+CC		= $(CROSS_COMPILE)gcc
+BUILD_OUTPUT	:= $(PWD)
+PREFIX		:= /usr
+DESTDIR		:=
+
+fspin : fspin.c
+CFLAGS +=	-Wall
+
+%: %.c
+	@mkdir -p $(BUILD_OUTPUT)
+	$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ -lpthread
+
+.PHONY : clean
+clean :
+	@rm -f $(BUILD_OUTPUT)/fspin
+
+install : fspin
+	install -d  $(DESTDIR)$(PREFIX)/bin
+	install $(BUILD_OUTPUT)/fspin $(DESTDIR)$(PREFIX)/bin/fspin
+	install -d  $(DESTDIR)$(PREFIX)/share/man/man1
+	install fspin.1 $(DESTDIR)$(PREFIX)/share/man/man1
diff --git a/tools/power/fspin/fspin.1 b/tools/power/fspin/fspin.1
new file mode 100644
index 000000000000..b57308e03b82
--- /dev/null
+++ b/tools/power/fspin/fspin.1
@@ -0,0 +1,68 @@
+.\"  This page Copyright (C) 2013 Len Brown <len.brown@intel.com>
+.\"  Distributed under the GPL, Copyleft 1994.
+.TH FSPIN 8
+.SH NAME
+fspin \- simple workload for power experiments
+.SH SYNOPSIS
+.ft B
+.B fspin
+.RB [ "\-v" ]
+.RB [ "\-i iterations" ]
+.RB [ "\-s sec_per_iteration" ]
+.RB [ "\-t threads" ]
+.RB [ "\-b bin_to_cpus" ]
+.RB [ "\-m memory (b|k|m)" ]
+.br
+.SH DESCRIPTION
+\fBfspin\fP
+heats up the hardware by running a
+floating-point spin loop per processor.
+Every
+.I interval_sec
+fspin presents the sum of the work completed
+by all threads.
+.SS Options
+.PP
+\fB-v\fP increases verbosity.
+By default, fspin prints only the quantity work completed.
+.PP
+\fB-s sec_per_iteration\fP
+Print the indicator of work completed every
+sec_per_interval seconds.  By default, 5 sec.
+.PP
+\fB-t threads\fP
+Create
+.I threads
+software threads.  Default is number of
+logical processors available, or if '-b' option is used,
+one thread per bound processor.
+.PP
+\fB-b bind_to_cpus\fP
+Bind the threads to the indicated list of comma-separated CPU numbers.
+A range of CPUs can be specified by using '-'.
+.PP
+\fB-i iterations\fP
+Exit after
+.I iterations
+and print total of work completed.
+Default is to continue running forever, printing work per iteration/sec.
+.PP
+\fB-m memory\fP
+Allocate arrays of 
+.I memory_size,
+which is followed by a modifier b|k|m, for bytes, kilobytes, or megabytes,
+respectively.  Default is 512 bytes, which will spin in-cache.
+Increase this number to exercise larger caches and memory.
+
+.SH WHAT FSPIN IS NOT
+Fspin is just a simple tool,
+and has not be characterized as a
+.I performance benchmark.
+Fspin is not a
+.I power virus for cooling HW design,
+as there are better tools, specialized for that purpose.
+
+.PP
+.SH AUTHORS
+.nf
+Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/fspin/fspin.c b/tools/power/fspin/fspin.c
new file mode 100644
index 000000000000..38288c1d61bb
--- /dev/null
+++ b/tools/power/fspin/fspin.c
@@ -0,0 +1,443 @@
+/*
+ * fspin.c - user utility to burn CPU cycles, thrash the cache and memory
+ *
+ * Copyright (c) 2013, Intel Corporation.
+ * Len Brown <len.brown@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+/*
+ * Creates one thread per logical processor (override with -t).
+ * Threads run on any processor (override with -b).
+ * Each thread allocates and initializes its own data.
+ * Then it processes the data using an infinite DAXPY loop:
+ * Double precision Y[i] = A*X[i] + Y[i]
+ *
+ * The parent thread wakes up every reporting interval,
+ * (override 5 sec default with -i),
+ * sums up and prints aggregate performance.
+ *
+ * The actual computation is somewhat arbitrary, if not random.
+ * The performance number is intended only to be compared to itself
+ * on the same machine, to illustrate how various power limiting
+ * techniques impact performance.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <ctype.h>
+#include <time.h>
+#include <sched.h>
+#include <errno.h>
+#include <sys/time.h>
+
+#define BANNER "fspin v1.1, April 7, 2013 - Len Brown <len.brown@intel.com>"
+
+#define handle_error_en(en, msg) \
+	do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0)
+
+#define handle_error(msg) \
+	do { perror(msg); exit(EXIT_FAILURE); } while (0)
+
+struct thread_info {		/* Used as argument to spin_loop() */
+	pthread_t thread_id;	/* ID returned by pthread_create() */
+	int thread_num;		/* Application-defined thread # */
+};
+
+struct padded {
+	double counter;	/* 8 bytes */
+	double pad[(32 - 1)];	/* round up to 256 byte line */
+} *thread_data;
+
+int num_threads;
+int thread_num_override;
+int data_bytes = 512;
+int nrcpus = 64;
+int sec_per_interval = 5;	/* seconds */
+int iterations;
+int verbose;
+int do_binding;
+
+cpu_set_t *cpu_affinity_set;
+size_t cpu_affinity_setsize;
+
+void
+allocate_cpusets()
+{
+	/*
+	 * Allocate and initialize cpu_affinity_set
+	 */
+	cpu_affinity_set = CPU_ALLOC(nrcpus);
+	if (cpu_affinity_set == NULL) {
+		perror("CPU_ALLOC");
+		exit(3);
+	}
+	cpu_affinity_setsize = CPU_ALLOC_SIZE(nrcpus);
+	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
+}
+
+void
+bind_to_cpus()
+{
+	if (!do_binding)
+		return;
+
+	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) {
+		fprintf(stderr, "bind_to_cpus() failed\n");
+		perror("sched_setaffinity");
+		exit(-1);
+	}
+}
+
+int get_num_cpus()
+{
+	cpu_set_t *mask;
+	size_t size;
+	int num_cpus;
+
+realloc:
+	mask = CPU_ALLOC(nrcpus);
+	size = CPU_ALLOC_SIZE(nrcpus);
+	CPU_ZERO_S(size, mask);
+	if (sched_getaffinity(0, size, mask) == -1) {
+		CPU_FREE(mask);
+		if (errno == EINVAL &&
+			nrcpus < (1024 << 8)) {
+			nrcpus = nrcpus << 2;
+			goto realloc;
+		}
+		perror("sched_getaffinity");
+		return -1;
+	}
+
+	num_cpus = CPU_COUNT_S(size, mask);
+
+	CPU_FREE(mask);
+
+	return num_cpus;
+}
+
+static void *spin_loop(void *arg)
+{
+	struct thread_info *tinfo = (struct thread_info *)arg;
+	double *x, *y;
+	int i = 0;
+	int data_entries = data_bytes / sizeof(double);
+	unsigned long long bitmask = random();
+
+		
+	x = malloc(data_bytes);
+	y = malloc(data_bytes);
+
+	if (x == NULL || y == NULL) {
+		perror("malloc");
+		exit(-1);
+	}
+
+	/*
+	 * seed data array with random bits
+	 */
+	for (i = 0; i < data_entries; ++i) {
+		x[i] = 1.0 + i * bitmask;
+		y[i] = 1.0 + i * bitmask;
+	}
+
+	for (i = 0; ; i++) {
+
+		double a = 3.1415926535 * i;
+
+		y[i] = a * x[i] + y[i];		/* DAXPY */
+
+		thread_data[tinfo->thread_num].counter++;
+
+		if (i >= data_entries)
+			i = 0;
+	}
+	/* not reached */
+}
+
+void usage()
+{
+	fprintf(stderr,
+		"Usage: fspin [-v][-s sec_per_iteration][-i iterations][-t num_threads][-b cpu_list][-m memory(b|k|m)]\n");
+	fprintf(stderr, "\twhere 'cpu_list' is comma and dash separated numbers with no spaces\n");
+	exit(EXIT_FAILURE);
+}
+
+void parse_error(char *string, char c)
+{
+	fprintf(stderr, "parse error on '%s' at '%c'\n", string, c);
+	usage();
+}
+
+int add_cpu_to_bind_mask(int cpu) {
+	static int num_added;
+
+	/* check if cpu is valid */
+	if (cpu < 0 || cpu > nrcpus) {
+		fprintf(stderr, "invalid cpu %d\n", cpu);
+		exit(1);
+	}
+
+	if (CPU_ISSET_S(cpu, cpu_affinity_setsize, cpu_affinity_set)) {
+		fprintf(stderr, "can't bind to cpu %d more than once\n", cpu);
+		exit(1);
+	}
+
+	/* add cpu to set */
+	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
+
+	if (verbose)
+		printf("%d, ", cpu);
+
+	num_added += 1;
+
+	return num_added;
+}
+
+
+int
+parse_bind_cpu_list(char *cpu_list)
+{
+	char *p;
+	int range_next = -1;
+	int total_cpus_added = 0;
+
+	allocate_cpusets();
+
+	for(p = cpu_list; *p != '\0'; ) {
+		int num, retval;
+
+		/* remaining list must start w/ valid cpu number */
+
+		if (!isdigit(*p))
+			parse_error(p, *p);
+
+		retval = sscanf(p, "%u", &num);
+		if (retval == EOF)
+			usage();
+		else if (retval == 0)
+			parse_error(p, *p);
+
+		if (range_next >= 0) {
+			if (num <= range_next)	/* range must be low to high */
+				parse_error(p, *p);
+
+			for ( ; range_next < num; range_next++)
+				total_cpus_added = add_cpu_to_bind_mask(range_next);
+
+			range_next = -1;
+		}
+
+		total_cpus_added = add_cpu_to_bind_mask(num);
+
+		while (isdigit(*p))
+			p++;
+
+		switch (*p) {
+		case ',':
+			p++;
+			continue;
+		case '-':
+			range_next = num + 1;
+			p++;
+			continue;
+		}
+
+	}
+	return total_cpus_added;
+}
+
+int parse_memory_param(char *p)
+{
+	int bytes;
+	char units;
+
+	if (2 != sscanf(p, "%d%c", &bytes, &units)) {
+		fprintf(stderr, "failed to parse -m\n");
+		usage();
+	}
+	switch (units) {
+	case 'b':
+	case 'B':
+		break;
+	case 'k':
+	case 'K':
+		bytes *= 1024;
+		break;
+	case 'm':
+	case 'M':
+		bytes *= 1024*1024;
+		break;
+	case 'g':
+	case 'G':
+		bytes *= 1024*1024*1024;
+		break;
+	default:
+		fprintf(stderr, "-m: bad memory units, use b, k, m, g\n");
+	
+	}
+	return bytes;
+	
+}
+
+void parse_args(int argc, char *argv[])
+{
+	int opt;
+
+	nrcpus = get_num_cpus();
+
+	while ((opt = getopt(argc, argv, "s:i:t:b:m:v")) != -1) {
+		switch (opt) {
+		case 's':
+			sec_per_interval = atoi(optarg);
+			if (verbose)
+				printf("sec_per_interval %d\n", sec_per_interval);
+			break;
+		case 'i':
+			iterations = atoi(optarg);
+			if (verbose)
+				printf("iterations %d\n", iterations);
+			break;
+		case 't':
+			thread_num_override = atoi(optarg);
+			if (verbose)
+				printf("Thread Count Override: %d\n", thread_num_override);
+			break;
+		case 'b':
+			do_binding = parse_bind_cpu_list(optarg);
+			if (verbose)
+				printf("Binding to %d CPUs.\n", do_binding);
+			break;
+		case 'm':
+			data_bytes = parse_memory_param(optarg);
+			if (verbose)
+				printf("Memory Override: %d\n", data_bytes);
+			break;
+		case 'v':
+			verbose++;
+			break;
+		default:	/* '?' */
+			usage();	/* does not return */
+		}
+	}
+}
+
+unsigned long long lsum_old;
+
+
+struct thread_info *tinfo;
+pthread_attr_t attr;
+
+void create_threads()
+{
+	int s, tnum;
+
+	if (thread_num_override)
+		num_threads = thread_num_override;
+	else if (do_binding)
+		num_threads = do_binding;
+	else
+		num_threads = nrcpus;
+
+	thread_data = calloc(num_threads, sizeof(struct padded));
+	if (thread_data == NULL)
+		handle_error("calloc");
+
+	/* Initialize thread creation attributes */
+
+	s = pthread_attr_init(&attr);
+	if (s != 0)
+		handle_error_en(s, "pthread_attr_init");
+
+	/* Allocate memory for pthread_create() arguments */
+
+	tinfo = calloc(num_threads, sizeof(struct thread_info));
+	if (tinfo == NULL)
+		handle_error("calloc");
+
+	for (tnum = 0; tnum < num_threads; tnum++) {
+		tinfo[tnum].thread_num = tnum;
+
+		/* The pthread_create() call stores the thread ID into
+		 * corresponding element of tinfo[]
+		 */
+
+		s = pthread_create(&tinfo[tnum].thread_id, &attr,
+				   &spin_loop, &tinfo[tnum]);
+		if (s != 0)
+			handle_error_en(s, "pthread_create");
+	}
+	printf("%d threads created\n", num_threads);
+	return;
+}
+
+
+void monitor_threads()
+{
+	struct timespec ts;
+	struct timeval tv_old, tv_new, tv_delta;
+	int i, j;
+	double interval_float;
+	unsigned long long lsum;
+
+	ts.tv_sec = sec_per_interval;
+	ts.tv_nsec = 0;
+	gettimeofday(&tv_old, (struct timezone *)NULL);
+
+	for (i = 0; iterations ? i < iterations : 1 ; i++) {
+
+		if (nanosleep(&ts, NULL) != 0) {
+			perror("nanosleep");
+			exit(-1);
+		}
+
+		for (j = 0, lsum = 0; j < num_threads; ++j)
+			lsum += thread_data[j].counter;
+
+		gettimeofday(&tv_new, NULL);
+		timersub(&tv_new, &tv_old, &tv_delta);
+
+		interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
+		printf("%.2f\n", (lsum - lsum_old)/interval_float/1000000);
+
+		tv_old = tv_new;
+		lsum_old = lsum;
+	}
+	/* summary */
+	for (j = 0, lsum = 0; j < num_threads; ++j) {
+		printf("%d %.2f\n", j, thread_data[j].counter/1000000.0);
+		lsum += thread_data[j].counter;
+	}
+	printf("Total %.2f\n", lsum/1000000.0);
+	
+}
+
+
+void print_banner()
+{
+	puts(BANNER);
+}
+
+int main(int argc, char *argv[])
+{
+	parse_args(argc, argv);
+
+
+	print_banner();
+
+	bind_to_cpus();
+
+	create_threads();
+
+	monitor_threads();	/* never returns */
+
+	return 0;
+}
author	Stephen Rothwell <sfr@canb.auug.org.au>	2013-08-01 12:02:01 +1000
committer	Stephen Rothwell <sfr@canb.auug.org.au>	2013-08-01 12:02:01 +1000
commit	d7c02690314265b425b54e6c4ebf028384f3cf02 (patch)
tree	e2e17d45a1371b9b31a0b9014e10a12305a8f522 /tools
parent	b493633648a132cbcb852210acc2ac5a3e890bdc (diff)
parent	5c99726b5faaf1ba4ca80c39d319f419c227f803 (diff)