https://github.com/magnusmorton/trace-analysis
Tip revision: 4645af99638edea16d00e811c922b0fb9d6b86d9 authored by Magnus Morton on 11 January 2016, 20:33:10 UTC
subplots and recording output
subplots and recording output
Tip revision: 4645af9
graphing.py
import argparse
import sys
from itertools import izip
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import numpy as np
import trace_parser
import trace as trace_utils
import string
import pdb
import search
dot = lambda x,y: sum(a*b for a,b in izip(x,y))
def produce_gnuplot_file(costs, times, names):
k_stats(costs, times, names)
with open("whole_program.dat", "w") as f:
for cost, time, name in izip(costs, times, names):
f.write(str(cost) + " " + str(time) + " " + name + " " + str(time / cost) + "\n")
def k_stats(costs, times, names):
ks = np.array([time/cost for cost, time in izip(costs, times)])
print "Mean: " + str(np.mean(ks)) + " STD DEV: " + str(np.std(ks))
def graph_residual(costs0, costsc, costsw, times, names):
width = 0.2333
ind = np.arange(len(names))
fn0 = np.poly1d(np.polyfit(costs0,times, 1))
fnc = np.poly1d(np.polyfit(costsc,times, 1))
fnw = np.poly1d(np.polyfit(costsw,times, 1))
res0 = np.subtract(times, fn0(costs0))
resc = np.subtract(times, fnc(costsc))
resw = np.subtract(times, fnw(costsw))
rects0 = plt.bar(ind, res0, width, color='r', hatch='/')
rectsc = plt.bar(ind+width, resc, width, color='g', hatch='-')
rectsw = plt.bar(ind+ 2*width, resw, width, color='b', hatch='\\')
plt.ylabel("Residual")
plt.xlabel("Benchmark")
plt.title("Residuals for each benchmark")
tick_names = [string.replace(name, "generic", "gen") for name in names]
plt.xticks(ind + 1.5*width, tick_names, rotation=20, ha = 'right')
outliers = ["fibfp", "heapsort", "ack", "divrec", "fib", "lattice", "trav2", "tak"]
plt.legend((rects0[0], rectsc[0], rectsw[0]), ("CM0", "CMC", "CMW"), title="Cost Model")
for rect0,rectc,rectw, name in izip(rects0,rectsc,rectsw, names):
if name in outliers:
rect0.set(hatch='*', alpha=0.50)
rectc.set(hatch='*', alpha=0.50)
rectw.set(hatch='*', alpha=0.50)
plt.show()
def residual_graph(filenames):
cm0 = [0,0,0,0,0]
cmc = [1,1,1,1,1]
cmw = [15.07, 2.43, 42.14, 709.79,1]
average_times = trace_parser.calculate_average_times()
programs = trace_parser.parse_files(filenames)
counts = {program.name: program.class_counts() for program in programs}
trace_utils.Fragment.model = cm0
costsc = [dot(counts[program.name], cmc) for program in programs]
costsw = [dot(counts[program.name], cmw) for program in programs]
costs0 = [program.cost() for program in programs]
times = [average_times[program.name] for program in programs]
names = [program.name for program in programs]
graph_residual(costs0, costsc,costsw, times, names)
sys.exit(0)
def rsquared(coeffs, x,y ):
# Polynomial Coefficients
results = {}
results['polynomial'] = coeffs.tolist()
# r-squared
p = np.poly1d(coeffs)
# fit values, and mean
yhat = p(x) # or [p(z) for z in x]
ybar = np.sum(y)/len(y) # or sum(y)/len(y)
ssreg = np.sum((yhat-ybar)**2) # or sum([ (yihat - ybar)**2 for yihat in yhat])
sstot = np.sum((y - ybar)**2) # or sum([ (yi - ybar)**2 for yi in y])
results['determination'] = ssreg / sstot
return results
def graph(costs, times, names, model):
outliers = ["fibfp", "heapsort", "ack", "divrec", "fib", "lattice", "trav2", "tak"]
filtered_names = [name for name in names if name not in outliers]
filtered_costs = [ cost for cost,name in izip(costs, names) if name in filtered_names]
filtered_times = [time for time, name in izip(times, names) if name in filtered_names]
outlier_costs = [cost for cost, name in izip(costs, names) if name in outliers]
outlier_times = [time for time, name in izip(times, names) if name in outliers]
coeffs = np.polyfit(filtered_costs, filtered_times, 1)
fit_fn = np.poly1d(coeffs)
k_stats(costs, times, names)
print fit_fn
print "rsquared"
print rsquared(coeffs, filtered_costs, filtered_times)
plt.ylabel("Execution time ($\mu s$)")
plt.xlabel("Cost")
plt.title("Whole program plot for " + model.upper())
plt.plot( filtered_costs, filtered_times, 'xg', label="Points included in fit" )
plt.plot(filtered_costs, fit_fn(filtered_costs), '-b')
plt.plot( outlier_costs, outlier_times, 'or', label="Points excluded by subsampling")
plt.legend()
plt.show()
def superimpose(costs1, costs2, times,names):
axes = [plt, plt.twiny()]
colors = ('g', 'b')
offsets = (20,-20)
for ax, color, costs, offset in izip(axes, colors, [costs1,costs2], offsets):
#parameter, covariance_matrix = curve_fit(line_func, times, costs)
m, b = np.polyfit(costs, times, 1)
fit_fn = np.poly1d((m,b))
ax.plot( costs[:10], times[:10], 'o' + color, costs, fit_fn(costs), '-' + color)
print fit_fn
for name, x,y in izip(names[:10], costs[:10], times[:10]):
plt.annotate(
name,
xy =(x,y),
xytext =(20,offset),
textcoords = 'offset points', ha = 'left', va = 'bottom',
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
#ax.plot(x, line_func(x, *parameter), color=color)
plt.show()
def line_func(x, a, b):
return a*x + b
def super_graph(filenames):
cm0 = [0,0,0,0,0]
cmc = [1,1,1,1,1]
cmw = [211,34,590,9937,14]
average_times = trace_parser.calculate_average_times()
programs = trace_parser.parse_files(filenames)
counts = {program.name: program.class_counts() for program in programs}
trace_utils.Fragment.model = cm0
costsc = [dot(counts[program.name], cmc) for program in programs]
costsw = [dot(counts[program.name], cmw) for program in programs]
costs0 = [program.cost() for program in programs]
times = [average_times[program.name] for program in programs]
names = [program.name for program in programs]
superimpose(costsc,costsw, times, names)
sys.exit(0)
def unfiltered_graph(costs, times):
coeffs = np.polyfit(costs, times,1)
fit_fn = np.poly1d(coeffs)
print fit_fn
print "rsquared", search.fit(costs, times)
plt.ylabel("Execution time ($\\mu s$)")
plt.title("Plot using parameters found through linear regression")
plt.xlabel("Cost")
plt.plot(costs, times, 'xg')
plt.plot(costs, fit_fn(costs), '-b')
#plt.show()
plt.savefig("model_scatter.png")
def main():
parser = argparse.ArgumentParser(description="Run cost analysis")
parser.add_argument("filenames", metavar="<file>", nargs = '+')
parser.add_argument("--model", "-m", default="cmw")
parser.add_argument( "-k", action='store_true')
parser.add_argument( "-s", action='store_true')
parser.add_argument( "-n", action='store_true')
args = parser.parse_args()
if args.k:
residual_graph(args.filenames)
if args.s:
super_graph(args.filenames)
model = []
if args.model == "cm0":
model = [0,0,0,0,0]
elif args.model == "cmc":
model = [1,1,1,1,1]
elif args.model == "cmw":
model = [211,34,590,9937,14]
else:
model = [float(num) for num in args.model.split(",")]
programs = trace_parser.parse_files(args.filenames)
counts = {program.name: program.class_counts() for program in programs}
average_times = []
for program in programs:
if program.net_time() > 20000000:
print "foo", program.name
if args.n:
times = [program.net_time() for program in programs]
else:
average_times = trace_parser.calculate_average_times()
trace_utils.Fragment.model = model
costs = [dot(counts[program.name], model) for program in programs]
if model == [0,0,0,0,0]:
print "FOOOOOO"
trace_utils.Fragment.model = [0,0,0,0,0,0,0]
costs = [program.cost() for program in programs]
if args.n:
unfiltered_graph(costs, times)
times = [average_times[program.name] for program in programs]
names = [program.name for program in programs]
graph(costs, times, names, args.model)
#produce_gnuplot_file(costs, times,names)
if __name__ == '__main__':
main()