/*
 * test_multiplex2.c - test time-based set multiplexing 
 *
 * Copyright (c) 2008 Google, Inc
 * Contributed by Stephane Eranian <eranian@google.com>
 *
 * Based on:
 * Copyright (c) 2004-2006 Hewlett-Packard Development Company, L.P.
 * Contributed by Stephane Eranian <eranian@hpl.hp.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 * 02111-1307 USA
 */
#ifndef _GNU_SOURCE
#define _GNU_SOURCE /* lroundf*/
#endif
#include <sys/types.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <math.h>
#include <limits.h>
#include <setjmp.h>
#include <fcntl.h>
#include <time.h>
#include <sys/wait.h>
#include <sys/ptrace.h>

#include <perfmon/pfmlib.h>
#include <perfmon/perfmon.h>

#include "detect_pmcs.h"
#include "pfm_tests.h"

#define NUM_SETS 2

#define SMPL_FREQ_IN_HZ	100

#define NUM_PMCS 64
#define NUM_PMDS 64

typedef struct {
	uint64_t	smpl_freq_hz;
	uint64_t	smpl_freq_ns;
	uint64_t	clock_res;
	unsigned long	cpu_mhz;
} program_options_t;

typedef struct _event_set_t {
	uint16_t id;
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	pfarg_pmc_t	pc[NUM_PMCS];
	pfarg_pmd_t	pd[NUM_PMDS];
	pfarg_setdesc_t setd;
	pfarg_setinfo_t setinfo;
} event_set_t;

typedef int	pfm_ctxid_t;

static event_set_t allsets[NUM_SETS];

static program_options_t options;
/*
 * unreliable for CPU with variable clock speed
 */
static unsigned long
get_cpu_speed(void)
{
	FILE *fp1;	
	unsigned long f1 = 0, f2 = 0;
	char buffer[128], *p, *value;

	memset(buffer, 0, sizeof(buffer));

	fp1 = fopen("/proc/cpuinfo", "r");
	if (fp1 == NULL) return 0;

	for (;;) {
		buffer[0] = '\0';

		p  = fgets(buffer, 127, fp1);
		if (p == NULL)
			break;

		/* skip  blank lines */
		if (*p == '\n') continue;

		p = strchr(buffer, ':');
		if (p == NULL)
			break;

		/*
		 * p+2: +1 = space, +2= firt character
		 * strlen()-1 gets rid of 
		 */
		*p = '\0';
		value = p+2;

		value[strlen(value)-1] = '\0';

		if (!strncasecmp("cpu MHz", buffer, 7)) {
			float fl;
			sscanf(value, "%f", &fl);
			f1 = (unsigned long)lroundf(fl);
			break;
		}
		if (!strncasecmp("BogoMIPS", buffer, 8)) {
			float fl;
			sscanf(value, "%f", &fl);
			f2 = lroundf(fl);
		}
	}
	fclose(fp1);
	return f1 == 0 ? f2 : f1;
}

static int
child(char *cmdline)
{
#define TEST_PRG "noploop"
	size_t len;
	char *buf, *p;

	len = strlen(cmdline);
	len += 1 + strlen(TEST_PRG) + 1;

	buf = malloc(len);
	if (!buf) {
		PFM_LOG("cannot allocate memory for %zu bytes for string", len);
		exit(1);
	}

	buf[0] = '\0';
	p = strrchr(cmdline, '/');
	if (p) {
		strncpy(buf, cmdline, ++p - cmdline);
	} 
	/* make sure we grab TEST_PRG from the same dir used for this process */
	strcat(buf, TEST_PRG);

	/*
	 * force the task to stop before executing the first
	 * user level instruction
	 */
	ptrace(PTRACE_TRACEME, 0, NULL, NULL);
	close(0); close(1); close(2);
	execl(buf, buf, "10", NULL);
	/* not reached */
	exit(1);
}

static int
check_results(int ctxid)
{
	unsigned int i, j;
	uint64_t value, tot_runs = 0;
	uint64_t tot_dur = 0, c;
	int ret;

	for(i=0; i < NUM_SETS; i++) {
		ret = pfm_read_pmds(ctxid, allsets[i].pd, allsets[i].outp.pfp_pmd_count);
		if (ret == -1) {
			PFM_LOG("pfm_read_pmds error: %s", strerror(errno));
			goto error;
		}
		ret = pfm_getinfo_evtsets(ctxid, &allsets[i].setinfo, 1);
		if (ret == -1) {
			PFM_LOG("cannot get set%u info: %s", allsets[i].setinfo.set_id, strerror(errno));
			return -1;
		}
	}
	/*
	 * compute average number of runs
	 *
	 * the number of runs per set can be at most off by 1 between all sets
	 */
	for (i=0; i < NUM_SETS; i++) {
		if (allsets[i].setinfo.set_runs == 0) {
			PFM_LOG("not enough runs to collect meaningful results: set%u did not run", allsets[i].setinfo.set_id);
			return -1;
		}
		if (allsets[i].setinfo.set_act_duration == 0) {
			PFM_LOG("active duration is 0 for set%u", allsets[i].setinfo.set_id);
			return -1;
		}
		tot_runs += allsets[i].setinfo.set_runs;
		tot_dur  += allsets[i].setinfo.set_act_duration;
	}

	for (i=0 ; i < NUM_SETS; i++) {
		for(j=0; j < allsets[i].outp.pfp_pmd_count; j++) {
			value = allsets[i].pd[j].reg_value;
			if (value == 0) {
				PFM_LOG("invalid value 0, set%u PMD%u", allsets[i].pd[j].reg_set, allsets[i].pd[j].reg_num);
				return -1;
			}

			/* 
			 * scaling
			 * We use duration rather than number of runs to compute a more precise
			 * scaled value. This avoids overcounting when the last set only partially
			 * ran.
			 *
			 * We use double to avoid overflowing of the 64-bit count in case of very
			 * large total duration
			 */
			c = llround(((double)value*tot_dur)/(double)allsets[i].setinfo.set_act_duration);
			if (c == 0) {
				PFM_LOG("invalid scaled count 0, set%u PMD%u", allsets[i].pd[j].reg_set, allsets[i].pd[j].reg_num);
				return -1;
			}
		}
		//PFM_LOG("act=%"PRIu64, allsets[i].setinfo.set_runs);
	}
	return 0;
error:
	return -1;
}

static int
measure_one_task(char *cmdline)
{
	int ctxid, i;
	pfarg_ctx_t ctx[1];
	pfarg_load_t load_arg;
	pid_t pid = 0;
	int status, ret;

	memset(ctx, 0, sizeof(ctx));
	memset(&load_arg, 0, sizeof(load_arg));

	/*
	 * create the context
	 */
	ctxid = pfm_create_context(ctx, NULL, NULL, 0);
	if (ctxid == -1 ) {
		PFM_LOG("cannot create PFM context: %s", strerror(errno));
		return -1;
	}

	/*
	 * set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e.,
	 * fd not visible to child.
	 */
	if (fcntl(ctxid, F_SETFD, FD_CLOEXEC)) {
		PFM_LOG("cannot set CLOEXEC: %s", strerror(errno));
		goto error;
	}

	for(i=0; i < NUM_SETS; i++) {
		if (pfm_create_evtsets(ctxid, &allsets[i].setd, 1)) {
			PFM_LOG("cannot create set%u: %s", allsets[i].setd.set_id, strerror(errno));
			goto error;
		}
		if (pfm_write_pmcs(ctxid, allsets[i].pc, allsets[i].outp.pfp_pmc_count) == -1) {
			PFM_LOG("pfm_write_pmcs error: %s", strerror(errno));
			goto error;
		}
	
		if (pfm_write_pmds(ctxid, allsets[i].pd, allsets[i].outp.pfp_pmd_count) == -1) {
			PFM_LOG("pfm_write_pmds error: %s", strerror(errno));
			goto error;
		}
	}

	pid = fork();
	if (pid == -1) {
		PFM_LOG("Cannot fork process");
		goto error;
	}

	if (pid == 0)
		exit(child(cmdline));

	ret = waitpid(pid, &status, WUNTRACED);
	if (ret < 0 || WIFEXITED(status)) {
		PFM_LOG("child process exited with errors: %d", WEXITSTATUS(status));
		goto error;
	}

	/*
	 * now attach the context
	 */
	load_arg.load_pid = pid;
	if (pfm_load_context(ctxid, &load_arg) == -1) {
		PFM_LOG("pfm_load_context error: %s", strerror(errno));
		goto error;
	}

	/*
	 * start monitoring
	 */
	if (pfm_start(ctxid, NULL) == -1) {
		PFM_LOG("pfm_start error: %s", strerror(errno));
		goto error;
	}

	/* actually start process */
	ptrace(PTRACE_DETACH, pid, NULL, 0);

	/* wait for termination */
	ret = waitpid(pid, &status, 0);
	if (ret != pid || WEXITSTATUS(status)) {
		PFM_LOG("child process exited with errors ret=%d pid=%d: %d", ret, pid, WEXITSTATUS(status));
		pid = 0;
		goto error;
	}
	ret = check_results(ctxid);
	
	close(ctxid);
	
	return ret;
error:
	if (pid) {
		kill(pid, SIGKILL);
		waitpid(pid, &status, 0);
	}
	close(ctxid);
	return -1;
}

static int
generate_default_sets(void)
{
	unsigned int i, j;
	pfmlib_regmask_t unavail;
	int ret;

	detect_unavail_pmcs(-1, &unavail);

	for (i=0; i < NUM_SETS; i++) {
		allsets[i].inp.pfp_dfl_plm = PFM_PLM3;

		allsets[i].inp.pfp_event_count = 1;

		/* alternate cycles and instructions retired per set */
		if (i & 1)
			ret = pfm_get_inst_retired_event(&allsets[i].inp.pfp_events[0]);
		else
			ret = pfm_get_cycle_event(&allsets[i].inp.pfp_events[0]);

		if (ret != PFMLIB_SUCCESS) {
			PFM_LOG("cannot find cycle event: %s", pfm_strerror(ret));
			return -1;
		}
		allsets[i].inp.pfp_unavail_pmcs = unavail;

		ret = pfm_dispatch_events(&allsets[i].inp, NULL, &allsets[i].outp, NULL);
		if (ret != PFMLIB_SUCCESS) {
			PFM_LOG("failed dispatch_events: %s", pfm_strerror(ret));
			return -1;
		}

		for (j=0; j < allsets[i].outp.pfp_pmc_count; j++) {
			allsets[i].pc[j].reg_set = i;
			allsets[i].pc[j].reg_num   = allsets[i].outp.pfp_pmcs[j].reg_num;
			allsets[i].pc[j].reg_value = allsets[i].outp.pfp_pmcs[j].reg_value;
		}

		for (j=0; j < allsets[i].outp.pfp_pmd_count; j++) {
			allsets[i].pd[j].reg_num = allsets[i].outp.pfp_pmds[j].reg_num;
			allsets[i].pd[j].reg_set = i;
		}
		allsets[i].setd.set_id = i;
		allsets[i].setd.set_flags    = PFM_SETFL_TIME_SWITCH;
		allsets[i].setd.set_timeout  = options.smpl_freq_ns;
		allsets[i].setinfo.set_id = i;
	}
	return 0;
}

/*
 * test timeout-based event set muliplexing
 */
int
do_multiplex1(char *cmdline)
{
	struct timespec ts;
	uint64_t f_ns, d, f_final;

	if (pfmlib_ok == 0)
		return -2;

	if ((options.cpu_mhz = get_cpu_speed()) == 0) {
		PFM_LOG("cannot get CPU speed");
		return -1;
	}
	/*
 	 * extract kernel clock resolution
 	 */
        clock_getres(CLOCK_MONOTONIC, &ts);
       	options.clock_res  = ts.tv_sec * 1000000000 + ts.tv_nsec;

	options.smpl_freq_hz = SMPL_FREQ_IN_HZ;

	/*
 	 * adjust frequency to be a multiple of clock resolution
 	 * otherwise kernel will fail pfm_create_evtsets()
 	 */

	/*
 	 * f_ns = run period in ns (1s/hz)
 	 */
	f_ns = 1000000000 / options.smpl_freq_hz;

	/* round up period in nanoseconds */
	d = (f_ns+options.clock_res-1) / options.clock_res;

	/* final period (multilple of clock_res) */
	f_final = d * options.clock_res;

	/* cannot get best precision without high_res timers
	 * but that's ok for this test, we care about noticing set switching
 	 */
	if (f_ns != f_final) {
		PFM_LOG("warning not getting the expected frequency due to kernel/hw limitation (high_res_timers may be disabled");
	}

	/* adjust period */
	options.smpl_freq_ns = f_final;

	/* not used */
	options.smpl_freq_hz = 1000000000 / f_final;

	generate_default_sets();

	return measure_one_task(cmdline);
}

int
main_multiplex1(int argc, char **argv)
{
	int ret;
	printf("multiplex1.test0  "); fflush(stdout);
	ret = do_multiplex1(*argv);
	printf("[%s]\n", ret == -1 ? "FAIL" : ret == -2 ? "SKIP" : "PASS");
	return ret;	
}

