#!/usr/bin/env python

import pickle
from alchemy import *
import numpy as np
from statsmodels.tools.tools import ECDF
import statsmodels as sm # recommended import according to the docs
import matplotlib.pyplot as plt
from collections import defaultdict

filesystems = [
        'ext4',
        'ext2',
        #'reiserfs',
        'xfs',
        'btrfs',
    ]

hosts = [
        'baal',
        'abaddon',
        'lilith'
    ]

seek_distances = {}

run_type = 'read'

for fs in filesystems:
    seek_distances[fs] = []
    for trial in [5, 6, 7]:
        for host in hosts:
            print 'doing %s, %s, %d' % (fs, host, trial)
            filename = 'seek_distances/%s.%s.%s.%d' % (fs, host, run_type, trial)
            with open(filename, 'rb') as f:
                seek_distances[fs].extend(pickle.load(f))

fig = plt.figure(figsize=(3,2))
plt.rcParams.update({'font.size' : 6})

for fs, diffs in seek_distances.iteritems():
    ecdf = ECDF(diffs)
    x = np.linspace(min(diffs), max(diffs))
    y = ecdf(x)
    plt.step(x, y, label = fs)

plt.xlabel('Seek distance')
plt.ylabel('CDF')
plt.legend(loc = 'lower right')
plt.gcf().subplots_adjust(bottom=0.15, left=0.15)
plt.savefig(run_type + '-cdfs.eps')
