/*************************************************************************/
/*                                                                       */
/* Copyright 2003, Living Circuit LLC.                                   */
/*                                                                       */
/* For academic use with the following condition:                        */
/*                                                                       */
/* Material is provided "as is" and "as available", without any          */
/* warranty, and if you use it you do it at your own risk, with no       */
/* support agreement expressed or implied.                               */
/*                                                                       */
/* Any other use of this software is contingent on having a valid and    */
/* signed license agreement with Living Circuit (livingCircuit.com)      */
/*                                                                       */
/*  All other rights reserved.                                           */
/*                                                                       */
/*************************************************************************/
/********************************************/
/********************************************/
/**                                        **/
/**                                        **/
/**     "pop_gen.c" reads does a long-     **/
/**     term population simulation based   **/
/**     on relative fitnesses and          **/
/**     mutation probabilities.            **/
/**                                        **/
/**     Written by Michael Molla           **/
/**     September, 2003                    **/
/**                                        **/
/**                                        **/
/**                                        **/
/*******************************************/
/*******************************************/


#include<stdio.h>
#include<math.h>
#include<stdlib.h>
//#include<syscalls.h>
#include "synth_pop.h"

#define VERSION "050605"

double double_rand()
{
   double to_return;

   to_return = ((double)RAND_MAX * rand() + rand())/((double)RAND_MAX * (double)RAND_MAX);

   return to_return;
}


// This is the function that decodes the fittness file
double fitness_codes(char code, double s, float t)
{
   double to_return = 1.0;

   // If you want to define a fittness function, add it here as another case
   switch(code)
   {
      case(0):
	 to_return = 0.0;
	 break;
      case(1):
	 to_return = 1.0;
	 break;
      case(2):
	 to_return = 1.0 + ((t-s)/2.0);
	 break;
      case(3):
	 to_return = 1.0 + ((3.0 * t)/4.0);
	 break;
      case(4):
	 to_return = 1.0 + t;
	 break;
      case(5):
	 to_return = 1.0 + t/2.0;
	 break;
      case(6):
	 to_return = 1.0 + t/4.0;
	 break;
      case(7):
	 to_return = 1.0 - s;
	 break;
      case(8):
	 to_return = 1.0 - s/2;
	 break;
      default:
	printf("Unknown code [%d].\n",code);
   }

   return to_return;
}


main(int argc, char **argv)
{
   FILE *fitness_functions;            // <- to input the fitnesses
   char fitness_fname[MAX_FNAME];      // <- to input the fitnesses
   FILE *output_file;                  // <- to output the results
   char output_fname[MAX_FNAME];       // <- to output the results
   FILE *param_file;                   // <- to output the params
   char param_fname[MAX_FNAME];        // <- to output the params
   double fitnesses[NUM_HAPLOID_GENOTYPES];    // <- remember the fitnesses
   char input_buffer[MAX_LINE];        // <- for reading the fitness file
   char *cur_token;                    // <- the file is tokenized
   char cur_col_num, fitness_col_num;  // <- which in the fitness file
   float s,k;                          // some utlilty vars
   long x, i, j, num_organisms;        //
   double cur_rand, cur_rand_for_one, cur_rand_for_two; // For picking random numbers
   int rand_seed;                      // 
   int m, n, array_position, cur_int; // more utlity vars
   int to_kick;      // <- to kick or not to kick the Pseudo-Random Number Generator (PRNG)
   int cur_genotype, parent_one, parent_two; // simple genotypes are hald in ints
   char a, b;                    // For holding a "genome"
   double mutations[2][3];             // For holding the mutation probs

   // For holding the gernations while we mutate them
   int prev_generation[NUM_HAPLOID_GENOTYPES], cur_generation[NUM_HAPLOID_GENOTYPES];
   float cur_fittness;                 // <- current fittness
   long generation_size;

   // Checkpointing variables
   int new_n = -1, old_n;
   char tmp_fname[MAX_FNAME];
   FILE *tmp_file;
   int result, num_tokens_read;
   char cur_buffer[MAX_LINE];          // <- for reading the progress file
   char tmp_buffer[MAX_LINE];          // <- for reading the progress file
   char prev_buffer[MAX_LINE];         // <- for reading the progress file
 
   // In case we crash, make sure we even got started
   printf("Called [%s] version %s\nrand_max=[%ld].\n",argv[0],VERSION,RAND_MAX);


   if((argc==2) || (argc==3)) // <- no longer allow a new "n" on the command line
   {
      // We need to find m, s, k, n, a12, a13, a23, b12, b13, b23 and the fitnesses

      strcpy(output_fname, argv[1]);
      if(!(output_file = fopen(output_fname, "r+")))
      {
         printf("Could not open output file [%s] to read and modify.\n", output_fname);
         exit(1);
      }
      strcpy(param_fname, output_fname);
      strcat(param_fname, PARAM_FILE_SUFFIX);
      if(!(param_file = fopen(param_fname, "r")))
      {
         printf("Could not open param file [%s] to read.\n", param_fname);
         exit(1);
      }

      if(argc>2)
      {
         new_n = atoi(argv[2]);
         printf("New n is [%d].\n",new_n);
      }

      // Read each line, but remember the previous one
      // in case the last one is not complete
      result = (int)fgets(prev_buffer,MAX_LINE,output_file);
      if(!result)
      {
         printf("You have to reenter the parameters.  We can't checkpoint because we've failed to read even one line of [%s].\n", output_fname);
         exit(1);
      }
      result = (int)fgets(cur_buffer,MAX_LINE,output_file);
      while(result)
      {
         strcpy(tmp_buffer, cur_buffer);
         result = (int)fgets(cur_buffer,MAX_LINE,output_file);
         if(result)
         {
            strcpy(prev_buffer, tmp_buffer);
         }
      }

      m = atoi((char *)strtok(cur_buffer, "\t"));
      printf("m (where we left off) is [%d].\n", m);fflush(NULL);
      for(num_tokens_read=0;num_tokens_read<NUM_HAPLOID_GENOTYPES;num_tokens_read++)
      {
         cur_token = (char *)strtok(NULL, "\t");
         if(cur_token)
         {
            // "cur" and "prev" here are for later
 	    cur_generation[num_tokens_read] = prev_generation[num_tokens_read] = atoi(cur_token);
         }
         else
         {
            if(cur_buffer[0]!=-1)
            {
               printf("The last line of [%s] is not complete (only %d entries).  Going to the second to last line.\n",output_fname,num_tokens_read);
               // I am changing the loop variable within the for loop.  Don't try this at home.
               // (to "-1" so that when incremented above it's right on track at zero)
               num_tokens_read = -1; 
               // The is a signal that we've moved back to "prev_buffer".
               cur_buffer[0] = -1;
               // I do think this is a nifty way to take advantage of the "NULL" option in strtok.
               m = atoi((char *)strtok(prev_buffer, "\t"));
            }
            else
            {
               printf("You have to reenter the parameters.  We can't checkpoint because neither of the last two lines of [%s] are complete.\n", output_fname);
               exit(1);
            }
         }
      }

      printf("m (where we left off) is [%d].\n", m);fflush(NULL);


      // Read the param file
      result = fscanf(param_file, "s = [%f], k = [%f], n = [%d] gerneration_size=[%ld].\n",&s,&k,&n,&generation_size);

      if(result != 4)
      {
         printf("You have to reenter the parameters.  Line 1 of [%s] is not of the right format.\n", param_fname); 
         exit(1);
      }

      if((new_n != -1) && (new_n != n))
      {
         printf("Switching n from [%d] (param file version) to [%d] (command line version). (Upon completion, look in \"%s%s\" for a parameter set reflecting this change)\n", n, new_n, param_fname, TMP_FILE_SUFFIX);
         n = new_n;
      }
      

      if(n <= m)
      {
         printf("%d (n, according to the param file [%s]) <= %d (where we left off according to the output file [%s]). Nothing to do.\n", n, param_fname, m, output_fname);
      }
      else
      {
         // Set everything to -1 and check afterword to make sure we set everything
         for(i=0;i<NUM_ALLILES;i++)
         {
	    mutations[A_MUTATION_RATE][i] = -1;
	    mutations[B_MUTATION_RATE][i] = -1;
         }
         for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
         {
	    fitnesses[i] = -1;
         }

         printf("Working on generations [%d] through [%d].\n",m,n);
         while(fgets(input_buffer,MAX_LINE,param_file))
         {
            cur_token = (char *)strtok(input_buffer, "\t ");
            while(cur_token)
            {
                //printf("[%s]",cur_token);
                if(cur_token[0]=='a')
                {
                   i = atoi(((char *)cur_token)+sizeof(char));
                   //printf("{%c}{%d}",'A',i);
                   cur_token = (char *)strtok(NULL, "\t ");
                   cur_token = (char *)strtok(NULL, "\t ");
                   mutations[A_MUTATION_RATE][i] = atof(cur_token);
                   //printf("%s{%.10f}\n",cur_token,mutations[A_MUTATION_RATE][i]);
                }
                else if(cur_token[0]=='b')
                {
                   i = atoi(((char *)cur_token)+sizeof(char));
                   //printf("{%c}{%d}",'B',i);
                   cur_token = (char *)strtok(NULL, "\t ");
                   cur_token = (char *)strtok(NULL, "\t ");
                   mutations[B_MUTATION_RATE][i] = atof(cur_token);
                   //printf("%s{%.10f}\n",cur_token,mutations[B_MUTATION_RATE][i]);
                }
                else
                {
                   i = atoi(cur_token);
                   //printf("{%d}",i);
                   cur_token = (char *)strtok(NULL, "\t ");
                   cur_token = (char *)strtok(NULL, "\t ");
                   fitnesses[i] = atof(cur_token);
                   //printf("%f = %s\n",fitnesses[i],cur_token);
                }
                cur_token = (char *)strtok(NULL, "\t ");
            }
         }


         // Check to see that none of these params is still -1.
         for(i=0;i<NUM_ALLILES;i++)
         {
	    if((mutations[A_MUTATION_RATE][i] == -1) ||
               (mutations[B_MUTATION_RATE][i] == -1))
            {
               printf("You have to reenter the parameters.  Mutation rates of [%s] are incomplete.\n", param_fname);
               exit(1); 
            }
         }
         for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
         {
            if(fitnesses[i] == -1)
            {
               printf("You have to reenter the parameters.  Fitnesses of [%s] are incomplete.\n", param_fname);
               exit(1);
            }
         }

         strcpy(tmp_fname, param_fname);
         strcat(tmp_fname, TMP_FILE_SUFFIX);
         if(tmp_file = fopen(tmp_fname,"w"))
         {
            // Print out what we got for future reference
            fprintf(tmp_file, "s = [%f], k = [%f], n = [%d] gerneration_size=[%ld].\n",s,k,n,generation_size);
            for(i=0;i<NUM_ALLILES;i++)
            {
	       fprintf(tmp_file,"a%d = %.10f\n",i,mutations[A_MUTATION_RATE][i]);
	       fprintf(tmp_file,"b%d = %.10f\n",i,mutations[B_MUTATION_RATE][i]);
            }
            // Print out the fittnesses for future references
            for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
            {
	       fprintf(tmp_file, "%d ->\t%f\n",i,fitnesses[i]);
            }
            fclose(tmp_file);
         }
         else
         {
            printf("Could not open [%s] to write.\n",tmp_fname);
         }
      }

      // The last line in the file could be chopped.
      fprintf(output_file,"\n");
      fprintf(output_file,"%d",m);fflush(NULL);
      for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
      {
         fprintf(output_file,"\t%d",cur_generation[i]);fflush(NULL);
      }
      fprintf(output_file,"\n");
      m++;

      //fclose(output_file);
      fclose(param_file);
   }
   else
   {
      // Were we called correctly?
      if(argc!=15 && argc!=14)
      {
         printf("Usage:%s <fitness fname> <fitness fname column> <s(percent)> <k(percent)> <n> <generation size> <p(a1->2)(x10^10)> <p(a2->3)(x10^10)> <p(a1->3)(x10^10)> <p(b1->2)(x10^10)> <p(b2->3)(x10^10)> <p(b1->3)(x10^10)> <output fname> [random seed]\n or   %s <output fname> [new n]\n",argv[0],argv[0]);
         exit(1);
      }


      // What's the fittness file?
      strcpy(fitness_fname, argv[1]);
      if(!(fitness_functions = fopen(fitness_fname, "r")))
      {
         printf("Could not open fitness file [%s] to read.\n", fitness_fname);
         exit(1);
      }
      strcpy(output_fname, argv[13]);
      if(!(output_file = fopen(output_fname, "w")))
      {
         printf("Could not open output file [%s] to write.\n", output_fname);
         exit(1);
      }
      strcpy(param_fname, output_fname);
      strcat(param_fname, PARAM_FILE_SUFFIX);
      if(!(param_file = fopen(param_fname, "w")))
      {
	 printf("Could not open param file [%s] to write.\n", param_fname);
         exit(1);
      }

      // Grab the rest of the params
      s = (float)(atoi(argv[3]))/100.0;
      k = (float)(atoi(argv[4]))/100.0;
      n = atoi(argv[5]);
      generation_size = atoi(argv[6]);
      mutations[A_MUTATION_RATE][ONE_TO_TWO]   = (double)(atoi(argv[7]))/10000000000.0;
      mutations[A_MUTATION_RATE][TWO_TO_THREE] = (double)(atoi(argv[8]))/10000000000.0;
      mutations[A_MUTATION_RATE][ONE_TO_THREE] = (double)(atoi(argv[9]))/10000000000.0;
      mutations[B_MUTATION_RATE][ONE_TO_TWO]   = (double)(atoi(argv[10]))/10000000000.0;
      mutations[B_MUTATION_RATE][TWO_TO_THREE] = (double)(atoi(argv[11]))/10000000000.0;
      mutations[B_MUTATION_RATE][ONE_TO_THREE] = (double)(atoi(argv[12]))/10000000000.0;
      if(argc>14)rand_seed = atoi(argv[14]);
      else       rand_seed = DEFAULT_SEED;
      // Print out what we got for future reference
      fprintf(param_file, "s = [%f], k = [%f], n = [%d] gerneration_size=[%ld].\n",s,k,n,generation_size);
      for(i=0;i<NUM_ALLILES;i++)
      {
         fprintf(param_file,"a%d = %.10f\n",i,mutations[A_MUTATION_RATE][i]);
         fprintf(param_file,"b%d = %.10f\n",i,mutations[B_MUTATION_RATE][i]);
      }

      // Read the fittness functions
      fitness_col_num = atoi(argv[2]);
      printf("fitness_col_num = [%d]\n",fitness_col_num);
      while(fgets(input_buffer,MAX_LINE,fitness_functions))
      {
        //printf("->[%s]<-\n",input_buffer);

	 if(input_buffer[0]!='#')
         {
 	    // Nothing fancy.  Tokenize the line and grab the right column
            cur_token = (char *)strtok(input_buffer, "\t");
            cur_col_num = 0;
            cur_genotype = 0;

            //printf("[%d -> [%s]]\n",cur_col_num,cur_token);

            // Figure out which row we're looking at
            while(cur_token)
            {
               cur_int = atoi(cur_token);

	       if(cur_col_num==0)
	       {
	          cur_genotype += (cur_int-1);
               }
               else if(cur_col_num==1)
	       {
	          cur_genotype += (cur_int-1)*3;
               }
               //else if(cur_col_num==2)
	       //{
	       //   cur_genotype += (cur_int-1)*9;
               //}
               //else if(cur_col_num==3)
	       //{
	       //   cur_genotype += (cur_int-1)*27;
               //}
               else if(cur_col_num==(fitness_col_num+1))
	       {
	          //printf("*%d*",cur_int);
                  //a1,a2,b1,b2
                  fitnesses[cur_genotype] =
                     fitness_codes(cur_int,s,k); 
               }

               cur_token = (char *)strtok(NULL, "\t");
               cur_col_num++;
            }
         }
      }
      fclose(fitness_functions);

      // Print out the fittnesses for future references
      for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
      {
         fprintf(param_file, "%d ->\t%f\n",i,fitnesses[i]);
      }
      fclose(param_file);

      // Initailize the 0th generation
      for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
      {
	 prev_generation[i] = 0;  
      }
      prev_generation[0] = generation_size;

      // Print out the header for the population distributions
      fprintf(output_file,"generation");

      //for(b[1]=0;b[1]<NUM_ALLILES;b[1]++)
      //{
         for(b=0;b<NUM_ALLILES;b++)
         {
           //for(a[1]=0;a[1]<NUM_ALLILES;a[1]++)
           //{
               for(a=0;a<NUM_ALLILES;a++)
               {
                  fprintf(output_file, "\t%d%d",a+1,b+1);
               }
               //}
         }
      //}
      fprintf(output_file, "\n");


      srand(rand_seed);


      printf("Doing the evolution (each dot = [%d] generations)...\n", OUTPUT_PERIOD);

      m=0;
   }

      for(/*m=0*/;(m<n)&&(prev_generation[NUM_HAPLOID_GENOTYPES-1]<generation_size);m++)
      {
        // Kick the random number generatior every once-in-a-while.
        to_kick = (m * generation_size) % RESEED_FREQUENCY;
	if(m!=0 && (to_kick < generation_size))
	{
	   rand_seed += RESEED_ADDITION;
           printf("Reseeding the PRNG to [%d].\n",rand_seed);
           srand(rand_seed);
	} 

         // Mutate
      
         // You need 2 sets of values so that you can mess around
         // with one (that changes) based on the distribution of the
         // other (which does not change).
         for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
         {
	    cur_generation[i] = prev_generation[i];
         }

         for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
         {
	    a = i%3;
            b = (i/3)%3;
	    //printf("[%d][%d][%d][%d]\n",a[0],a[1],b[0],b[1]);fflush(NULL);
	    for(j=0;j<prev_generation[i];j++)
	    {
               cur_genotype = i;
	       cur_rand = ((double)RAND_MAX * rand() + rand())/((double)RAND_MAX * (double)RAND_MAX);
               //printf("[%.10f]\t",cur_rand);
 	       switch(a)
	       {
	          case 0:
                     if(cur_rand < mutations[A_MUTATION_RATE][ONE_TO_TWO])
	             {
	                //printf(".");fflush(NULL);
                        cur_genotype+=1;
	             }
                     else if(cur_rand < (mutations[A_MUTATION_RATE][ONE_TO_TWO] +
                                         mutations[A_MUTATION_RATE][ONE_TO_THREE]))
	             {
	                //printf(";");fflush(NULL);
                        cur_genotype+=2;
	             }
                     break;
	          case 1:
                     if(cur_rand < mutations[A_MUTATION_RATE][TWO_TO_THREE])
	             { 
	                //printf(",");fflush(NULL);
                        cur_genotype+=1;
	             }
                     break;
	       }


	       cur_rand = ((double)RAND_MAX * rand() + rand())/((double)RAND_MAX * (double)RAND_MAX);

 	       switch(b)
	       {
	          case 0:
                     if(cur_rand < mutations[B_MUTATION_RATE][ONE_TO_TWO])
	             {
	                //printf("/");fflush(NULL);
                        cur_genotype+=3;
	             }
                     else if(cur_rand < (mutations[B_MUTATION_RATE][ONE_TO_TWO] +
                                         mutations[B_MUTATION_RATE][ONE_TO_THREE]))
	             {
	                //printf("|");fflush(NULL);
                        cur_genotype+=6;
	             }
                     break;
	          case 1:
                     if(cur_rand < mutations[B_MUTATION_RATE][TWO_TO_THREE])
	             {
	                //printf("\\");fflush(NULL);
                        cur_genotype+=3;
	             }
                     break;
	       }


               cur_generation[i]--;
               cur_generation[cur_genotype]++;

            }
            //printf("[%f]\n",cur_rand/(double)RAND_MAX);fflush(NULL);
         }


         //Do the Fitness Here

         for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
         {
	    prev_generation[i] = cur_generation[i];
         }

         for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
	 {
	   //a[0] = i%3;
	   //a[1] = (i/3)%3,
	   //b[0] = (i/9)%3,
	   //b[1] = i/27;

	   cur_fittness = fitnesses[i];
           cur_generation[i] =
	      (int)((cur_fittness * 100.0) * (float)prev_generation[i]);
         }
	 //printf( "\n");


         // Mate

         for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
         {
	    prev_generation[i] = cur_generation[i];
            cur_generation[i] = 0;
         }

         num_organisms = 0;
         for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
         {
	    num_organisms +=  prev_generation[i];
	 }
         //printf("%d\n",num_organisms);

         // For as many organisms as we want in the pool
         for(i=0;i<generation_size;i++)
	 {

            // Pick an a1 allile (and a b1 allile as of 10/15/03)
            // Pick an a2 allile (and a b2 allile as of 10/15/03)
	    //cur_rand_for_one = rand()/((double)RAND_MAX);
	    //cur_rand_for_two = rand()/((double)RAND_MAX);
	    cur_rand_for_one = ((double)RAND_MAX * rand() + rand())/((double)RAND_MAX * (double)RAND_MAX);
	    cur_rand_for_two = ((double)RAND_MAX * rand() + rand())/((double)RAND_MAX * (double)RAND_MAX);
            for(j=0;(j<NUM_HAPLOID_GENOTYPES) && ((cur_rand_for_one >= 0.0) || (cur_rand_for_two >= 0.0));j++)
	    {
	       // From which genotype do we pick?
	       if((cur_rand_for_one >= 0.0) &&
                  (cur_rand_for_one < (((float)(prev_generation[j]))/((float)num_organisms))))
	       {
                  parent_one = j;
	       }

	       cur_rand_for_one -= (((float)(prev_generation[j]))/((float)num_organisms));


	       // From which genotype do we pick?
	       if((cur_rand_for_two >= 0.0) &&
                  (cur_rand_for_two < (((float)(prev_generation[j]))/((float)num_organisms))))
	       {
                  parent_two = j;
	       }
               
	       cur_rand_for_two -= (((float)(prev_generation[j]))/((float)num_organisms));

	    }


            cur_genotype = 0;

            // Gen an a gene from one of the parents
            //(flip a coin)
            cur_rand = rand();
            if((char)cur_rand & 0x01)
	    {
	       cur_genotype += (parent_one%3);
	    }
            else
	    {
	       cur_genotype += (parent_two%3);
	    }

            // Gen a b gene from one of the parents
            //(flip a coin)
            if((char)cur_rand & 0x02)
	    {
	       cur_genotype += ((parent_one/3)%3) * 3;
	    }
            else
	    {
	       cur_genotype += ((parent_two/3)%3) * 3;
	    }

            cur_generation[cur_genotype]++;
	 }

         // Make the output (once for every "OUTPUT PERIOD")
         if(!(m%OUTPUT_PERIOD))
	 {
	    printf(".");fflush(NULL);
            fprintf(output_file,"%d",m);fflush(NULL);
            for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
	    {
 	       fprintf(output_file,"\t%d",cur_generation[i]);fflush(NULL);
	    }
            fprintf(output_file,"\n");
	 }

         for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
         {
	    prev_generation[i] = cur_generation[i];
         }
      }

      // Do the output for the final generation (only if we haven't already)
      if(m%OUTPUT_PERIOD)
      {
	 printf(".");fflush(NULL);
         fprintf(output_file,"%d",m);fflush(NULL);
         for(i=0;i<NUM_HAPLOID_GENOTYPES;i++)
	 {
            fprintf(output_file,"\t%d",cur_generation[i]);fflush(NULL);
	 }
         fprintf(output_file,"\n");
      }

      fclose(output_file);
}
