#!/usr/bin/python
#
#  Copy PSSM file, adding pseudocount
#
#	usage:  pssm2pssm.py [pseduocount] < INPUT > OUTPUT
#
#		'pseudocount' will be added to each entry, then each set of four [ACGT] will be normalized.
#

import sys,os,string;

USAGE = "usage:  " + sys.argv[0] + " [pseudocount = 0.0] < pssm-file";
ALPHABET = 'ACGT';
TAB = '\t';
ENDL = '\n';


def add_pseudocount(input, output, pseudocount):
	line = input.readline();
	while (line != ''):
		if (line == '\n'):
			line = input.readline();
			continue;
		split = string.split(line);
		C = split[0];
		E = split[1];
		output.write(C + TAB + E);
		for i in range(0, len(C)):
			a = float(split[2 + i*4 + 0]) + pseudocount;
			c = float(split[2 + i*4 + 1]) + pseudocount;
			g = float(split[2 + i*4 + 2]) + pseudocount;
			t = float(split[2 + i*4 + 3]) + pseudocount;
			sum = a+c+g+t;
			output.write(TAB + str(a/sum));
			output.write(TAB + str(c/sum));
			output.write(TAB + str(g/sum));
			output.write(TAB + str(t/sum));
		output.write(ENDL);
		line = input.readline();
	return;

def main():
	
	input = sys.stdin;
	output = sys.stdout;
	pseudocount = 0.0;
	if (len(sys.argv) > 1):
		pseudocount = float(sys.argv[1]);
	
	add_pseudocount(input, output, pseudocount);

main();

