#!/usr/bin/env python
"""
Tagging golden data in terminal.

The script will randomly sample candidate pairs from blocked data, and show the
candidate pair for tagging in color coded form waiting for user input (y/n).
Default being no.

Usage: golden.py <blocked-cand-pairs[.csv]> <start-cand-id> <end-cand-id>

"""

import sys
import csv
import numpy as np
import pickle
import logging
from blessings import Terminal

def csvquote(value):
    """CSV quote row value. Quote values with comma and quotes. Escape quotes in
    the row value with two quotes.

    >>> csvquote('abc')
    u'abc'
    >>> csvquote('a, b, c')
    u'"a, b, c"'
    >>> csvquote('a "b" c')
    u'"a ""b"" c"'
    >>> csvquote('a\\nb')  # Escape for doctest
    u'"a\\nb"'
    >>> csvquote('a, \\n"b", c')  # Escape for doctest
    u'"a, \\n""b"", c"'
    >>> csvquote(1)
    u'1'
    """
    value = '{0}'.format(value)
    value = unicode(value, 'utf-8')
    value = value.replace('\r', ' ')
    quote = False
    if value.find(u'"') != -1:
        value = value.replace('"', '""')
        quote = True
    if value.find(u',') != -1 or value.find(u'\n') != -1:
        quote = True
    if quote:
        return u'"' + value + u'"'
    else:
        return value

class CsvDataFile:
    """Load the CSV data file with header and rows.
    """

    def __init__(self, path):
        """Constructor.

        Args:
            path: Path to the CSV file.
        """
        self.header = {}
        self.rows = []
        with open(path, 'rb') as csvfile:
            header = None
            reader = csv.reader(csvfile)
            for row in reader:
                if not header:
                    header = row
                else:
                    self.rows.append(row)
        for i, col in zip(xrange(len(header)), header):
            self.header[col] = i

def write_header(header, golden = 'golden.csv'):
    """Write header to golden data.
    """
    header_list = ['']*len(header)
    for k, v in header.iteritems():
        header_list[v] = k
    with open(golden, 'ab') as log:
        line = u','.join(header_list + ['label:INTEGER']) + u'\n'
        log.write(line.encode('utf-8'))

def write_golden_pair(row, answer, golden = 'golden.csv'):
    """Write a tagged golden data pair.
    """
    with open(golden, 'ab') as log:
        line = u','.join(map(csvquote, row + [answer])) + u'\n'
        log.write(line.encode('utf-8'))
    #with open(golden, 'ab') as pkl:
    #    pickle.dump(row + [answer], pkl)

def show_cand_pair(header, row, current = None):
    """Show candidate pair for tagging to user and write output.
    """
    t = Terminal()
    print '{t.clear}{t.black}\n'.format(t=t)
    print '{t.blue}{t.bold}Google Play'.format(t=t)
    print '{t.red}{t.bold}iTunes'.format(t=t)
    print
    if current: print '{t.bold}{t.black} [{current}]'.format(t=t,
            current=current)
    print '{t.bold}{t.black} NAME  {t.normal}{t.blue}{name}'.format(
            t=t, name=row[header['google-play.name:TEXT']])
    print                  '       {t.normal}{t.red}{name}'.format(
            t=t, name=row[header['itunes.name:TEXT']])
    print '{t.bold}{t.black}YEAR  {t.normal}{t.blue}{year0} {t.red}{year1}'.format(
            t=t, year0=row[header['google-play.year:INTEGER']],
            year1=row[header['itunes.year:INTEGER']])
    print '{t.bold}{t.black}ACTORS {t.normal}{t.blue}{name}'.format(
            t=t, name=row[header['google-play.actors:TEXT']])
    print                  '       {t.normal}{t.red}{name}'.format(
            t=t, name=row[header['itunes.actors:TEXT']])
    print '{t.bold}{t.black}DIRECTORS {t.normal}{t.blue}{name}'.format(
            t=t, name=row[header['google-play.directors:TEXT']])
    print                  '          {t.normal}{t.red}{name}'.format(
            t=t, name=row[header['itunes.directors:TEXT']])
    print '{t.bold}{t.black}URL {t.normal}{t.blue}{name}'.format(
            t=t, name='https://play.google.com/store/movies/details?id=' + \
                    row[header['google-play.id:TEXT']])
    print                  '    {t.normal}{t.red}{name}'.format(
            t=t, name='https://itunes.apple.com/us/movie/x/id' + \
                    row[header['itunes.id:TEXT']])
    print '{t.normal}'.format(t=t)
    answer = raw_input('Match? y/[n]: ')
    if answer == 'y':
        answer = 1
    else:
        answer = 0
    write_golden_pair(row, answer)

def tag_golden(data, start, end, seen = None):
    """Sample cand pair in [start, end) range and tag them.
    """
    logging.info('Sampling randomly ...')
    rows = set(xrange(start, end))
    if seen:
        rows = rows - seen
    rows = np.array(list(rows))
    np.random.shuffle(rows)
    logging.info('Starting to show candidate pairs randomly ...')
    current = 1
    write_header(data.header)
    for rowid in rows:
        show_cand_pair(data.header, data.rows[rowid], current)
        current += 1

def main():
    """Main function.
    """
    logging.basicConfig(level=logging.DEBUG)
    logging.info('Loading data ...')
    data = CsvDataFile(sys.argv[1])
    assert len(data.rows) == int(data.rows[-1][0]) + 1
    tag_golden(data, int(sys.argv[2]), int(sys.argv[3]))

if __name__ == '__main__':
    main()