#!/usr/bin/env python

import sys, sh, subprocess, os, signal

class Server(object):
    def __init__(self, hostname, device, volume):
        self.hostname = hostname
        self.device = device
        self.volume = volume
        self.ssh = sh.ssh.bake('root@' + hostname)
        self.open_files = []

    def doSsh(self, cmd, _out=None, _bg = False):
        return self.ssh(cmd.split(), _out=_out, _bg=_bg)

    def start_blktrace(self, filename):
        self.doSsh('sync')
        self.doSsh('echo 3 > /proc/sys/vm/drop_caches')
        self.process = subprocess.Popen(
                ('ssh root@%s blktrace -b 2048 -n 8 -d %s -o %s' % (self.hostname, self.device, filename)).split()
            )

    def end_blktrace(self):
        self.doSsh('sync')
        try:
            self.doSsh('killall blktrace')
        except:
            print 'WARNING: failed to killall blktrace'
        self.process.wait()

servers = [
        Server('baal', '/dev/sda', '/dev/sda2'),
        Server('lilith', '/dev/sda', '/dev/ubuntu/hadoop'),
        Server('abaddon', '/dev/sda', '/dev/ubuntu/hadoop'),
]

filesystems = [
        #('zfs', 'zpool create hdpool -m /mnt/hadoop'),
        ('reiserfs', 'mkfs.reiserfs -q'),
        ('ext4', 'mkfs.ext4'),
        ('ext2', 'mkfs.ext2'),
        ('xfs', 'mkfs.xfs -f'),
        ('btrfs', 'mkfs.btrfs'),
]

def run_trial(fs, trial_num, run_type):
    print "%s: trial %d, %s" % (fs, trial_num, run_type)
    trace_filename = fs + '.' + run_type + '.' + str(trial_num)

    if run_type == 'write':
        while True:
            try:
                sh.sudo('hadoop fs -rmr -skipTrash /benchmarks'.split())
                break
            except sh.ErrorReturnCode_255:
                print "failed to delete benchmark data; retrying in 5 seconds..."
                sh.sleep(5)

    for server in servers:
        server.start_blktrace(server.hostname + '.' + trace_filename)
        #server.start_blktrace('data/blktrace/%s/%s.%d' % (fs, server.hostname, trial_num))

    sh.sleep(1)    

    try:
        """
        sh.time(
                'sudo hadoop jar /usr/share/hadoop/hadoop-examples-1.0.4.jar teragen -Dmapred.map.tasks=10 1000000000 /tera'.split(), 
                _err = 'data/%s.gen.%d' % (fs, trial_num)
        )
        sh.time(
                'sudo hadoop jar /usr/share/hadoop/hadoop-examples-1.0.4.jar terasort -Dmapred.map.tasks=10 -Dmapred.reduce.tasks=10 /tera /sorted'.split(), 
                _err = 'data/%s.sort.%d' % (fs, trial_num)
        )
        """
        sh.time(
                ('sudo hadoop jar /usr/share/hadoop/hadoop-test-1.0.4.jar TestDFSIO -%s -fileSize 100 -nrFiles 100' % run_type).split(),
                _err = 'data/%s/%s.%d' % (run_type, fs, trial_num)
        )
    except sh.ErrorReturnCode_255:
        print "WARNING: test failed"
    
    for server in servers:
        server.end_blktrace()
        sh.scp(('root@%s:%s.* %s' % (server.hostname, server.hostname + '.' + trace_filename, 'data/blktrace/%s/' % fs)).split())

def make_directories():
    for fs, _ in filesystems:
        try:
            sh.mkdir('-p',  'data/blktrace/' + fs)
        except:
            pass

def sigterm_handler(signum, frame):
    for server in servers:
        server.end_blktrace()
    os.exit(1)

def stop_all():
    print 'stopping hadoop...'
    try:
        sh.sudo('stop-mapred.sh')
        sh.sudo('stop-dfs.sh')
    except:
        pass

def start_all():
    print 'starting hadoop...'
    try:
        sh.sudo('start-mapred.sh')
        sh.sudo('start-dfs.sh')
    except:
        pass

def umount():
    print 'unmounting filesystems...'
    for server in servers:
        try:
            server.doSsh('umount ' + server.volume)
        except:
            pass

def mount():
    print 'mounting filesystems...'
    for server in servers:
        server.doSsh('mount ' + server.volume + ' /mnt/hadoop')

def format_namenode():
    print 'formatting namenode...'
    sh.sudo('hadoop namenode -format'.split())

def hadoop_mkdir():
    print 'making hadoop directory...'
    sh.sudo('hadoop fs -mkdir /benchmarks'.split())

def main():
    signal.signal(signal.SIGTERM, sigterm_handler)
    make_directories()
    start_trial = int(sys.argv[1])
    end_trial = int(sys.argv[2])

    for fs, cmd in filesystems:
        stop_all()
        umount()
        for server in servers:
            server.doSsh(cmd + ' ' + server.volume)
        mount()
        format_namenode()
        start_all()
        hadoop_mkdir()
        for trial in range(start_trial, end_trial):
            run_trial(fs, trial, 'write')
            run_trial(fs, trial, 'read')

if __name__ == '__main__':
    main()
