#!/usr/bin/env python

import os, sys, re, sh
from alchemy import Trial, Trace, get_session, initialize
from sys import stderr

filesystems = [
        'ext4',
        'ext2',
        'xfs',
        'btrfs',
        'reiserfs',
]

run_types = [
        'read',
        'write',
]

hosts = [
        'baal',
        'abaddon',
        'lilith',
]

blacklist = [
        ('ext2', 'read', 0),
        ('xfs', 'read', 1),
]

def parse_line(line):
    class Data(object):
        pass
    data = Data()
    if not re.match(r'\s+\d+,\d+.*', line):
        return None
    split_line = line.split()
    data.device_numbers = split_line[0]
    data.cpu_id = int(split_line[1])
    data.sequence_number = int(split_line[2])
    data.ts = float(split_line[3])
    data.process_id = int(split_line[4])
    data.action = split_line[5]
    data.rwdbs = split_line[6]
    data.output = split_line[7:]

    return data

def compile_traces(session, num_trials):
    for fs in filesystems:
        for host in hosts:
            for run_type in run_types:
                for trial in range(num_trials):
                    filename = 'blktrace/%s/%s.%s.%s.%d' % (fs, host, fs, run_type, trial)
                    seq_number = 0
                    print >>stderr, 'parsing ' + filename
                    for line in sh.blkparse(filename, _iter = True):
                        try:
                            datum = parse_line(line)
                            if datum:
                                    session.add(Trace(datum, fs, trial, run_type))
                        except Exception, e:
                            print filename + ':' + str(seq_number) + ': ' + line
                            continue
                        seq_number += 1
                    session.commit()

def agg_runs(session, num_trials):
    for fs in filesystems:
        slots_millis_maps = []
        for run_type in run_types:
            for trial_num in range(num_trials):
                filename = "%s/%s.%d" % (run_type, fs, trial_num)
                if (fs, run_type, trial_num) in blacklist:
                    print 'skipping ' + filename
                    continue
                slots_millis_maps = slots_millis_reduces = elapsed_time = heap_usage = None
                with open(filename, 'r') as datafile:
                    for line in datafile:
                        mo = re.search(r'SLOTS_MILLIS_MAPS=(\d+)', line)
                        if mo:
                            slots_millis_maps = int(mo.group(1))
                            continue
                        mo = re.search(r'SLOTS_MILLIS_REDUCES=(\d+)', line)
                        if mo:
                            slots_millis_reduces = int(mo.group(1))
                            continue
                        mo = re.search(r'(\d+):(\d+)\.(\d+)elapsed', line)
                        if mo:
                            minutes, seconds, centis = map(int, (mo.group(1), mo.group(2), mo.group(3)))
                            elapsed_time = minutes * 60 * 60 * 1000 + seconds * 1000 + centis * 10
                            continue
                        mo = re.search(r'Total committed heap usage \(bytes\)=(\d+)', line)
                        if mo:
                            heap_usage = int(mo.group(1))
                            continue
                trial = Trial(fs, slots_millis_maps, slots_millis_reduces, elapsed_time, heap_usage, run_type)
                print '%s: %s' % (filename, trial)
                session.add(trial)
            session.commit()

def main():
    dbname = 'sqlite:///' + sys.argv[1]
    num_trials = int(sys.argv[2])
    initialize(dbname)
    session = get_session(dbname)
    agg_runs(session, num_trials)
    compile_traces(session, num_trials)

if __name__ == '__main__':
    main()
