generate.cc

Go to the documentation of this file.
00001 /* MLPACK 0.2
00002  *
00003  * Copyright (c) 2008, 2009 Alexander Gray,
00004  *                          Garry Boyer,
00005  *                          Ryan Riegel,
00006  *                          Nikolaos Vasiloglou,
00007  *                          Dongryeol Lee,
00008  *                          Chip Mappus, 
00009  *                          Nishant Mehta,
00010  *                          Hua Ouyang,
00011  *                          Parikshit Ram,
00012  *                          Long Tran,
00013  *                          Wee Chin Wong
00014  *
00015  * Copyright (c) 2008, 2009 Georgia Institute of Technology
00016  *
00017  * This program is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU General Public License as
00019  * published by the Free Software Foundation; either version 2 of the
00020  * License, or (at your option) any later version.
00021  *
00022  * This program is distributed in the hope that it will be useful, but
00023  * WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU General Public License
00028  * along with this program; if not, write to the Free Software
00029  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00030  * 02110-1301, USA.
00031  */
00043 #include "fastlib/fastlib.h"
00044 #include "support.h"
00045 #include "discreteHMM.h"
00046 #include "gaussianHMM.h"
00047 #include "mixgaussHMM.h"
00048 #include "mixtureDST.h"
00049 
00050 using namespace hmm_support;
00051 
00052 success_t generate_discrete();
00053 success_t generate_gaussian();
00054 success_t generate_mixture();
00055 void usage();
00056 
00057 const fx_entry_doc hmm_generate_main_entries[] = {
00058   {"type", FX_REQUIRED, FX_STR, NULL,
00059    "  HMM type : discrete | gaussian | mixture.\n"},
00060   {"profile", FX_REQUIRED, FX_STR, NULL,
00061    "  A file containing HMM profile.\n"},
00062   {"length", FX_PARAM, FX_INT, NULL,
00063    "  Sequence length, default = 10.\n"},
00064   {"lenmax", FX_PARAM, FX_INT, NULL,
00065    "  Maximum sequence length, default = length\n"},
00066   {"numseq", FX_PARAM, FX_INT, NULL,
00067    "  Number of sequance, default = 10.\n"},
00068   {"seqfile", FX_PARAM, FX_STR, NULL,
00069    "  Output file for the generated sequences.\n"},
00070   {"statefile", FX_PARAM, FX_STR, NULL,
00071    "  Output file for the generated state sequences.\n"},
00072   FX_ENTRY_DOC_DONE
00073 };
00074 
00075 const fx_submodule_doc hmm_generate_main_submodules[] = {
00076   FX_SUBMODULE_DOC_DONE
00077 };
00078 
00079 const fx_module_doc hmm_generate_main_doc = {
00080   hmm_generate_main_entries, hmm_generate_main_submodules,
00081   "This is a program generating sequences from HMM models.\n"
00082 };
00083 
00084 int main(int argc, char* argv[]) {
00085   fx_init(argc, argv, &hmm_generate_main_doc );
00086   success_t s = SUCCESS_PASS;
00087   if (fx_param_exists(NULL,"type")) {
00088     const char* type = fx_param_str_req(NULL, "type");
00089     if (strcmp(type, "discrete")==0)
00090       s = generate_discrete();
00091     else if (strcmp(type, "gaussian")==0) 
00092       s = generate_gaussian();
00093     else if (strcmp(type, "mixture")==0)
00094       s = generate_mixture();
00095     else {
00096       printf("Unrecognized type: must be: discrete | gaussian | mixture !!!\n");
00097       return SUCCESS_PASS;
00098     }
00099   }
00100   else {
00101     printf("Unrecognized type: must be: discrete | gaussian | mixture  !!!\n");
00102     s = SUCCESS_FAIL;
00103   }
00104   if (!PASSED(s)) usage();
00105 
00106   fx_done(NULL);
00107 }
00108 
00109 void usage() {
00110   printf("\nUsage:\n");
00111   printf("  generate --type=={discrete|gaussian|mixture} OPTIONS\n");
00112   printf("[OPTIONS]\n");
00113   printf("  --profile=file   : file contains HMM profile\n");
00114   printf("  --length=NUM     : sequence length\n");
00115   printf("  --lenmax=NUM     : maximum sequence length, default = length\n");
00116   printf("  --numseq=NUM     : number of sequence\n");
00117   printf("  --seqfile=file   : output file for generated sequences\n");
00118   printf("  --statefile=file : output file for generated state sequences\n");
00119 }
00120 
00121 success_t generate_mixture() {
00122   if (!fx_param_exists(NULL, "profile")) {
00123     printf("--profile must be defined.\n");
00124     return SUCCESS_FAIL;
00125   }
00126   const char* profile = fx_param_str_req(NULL, "profile");
00127   const int seqlen = fx_param_int(NULL, "length", 10);
00128   const int seqlmax = fx_param_int(NULL, "lenmax", seqlen);
00129   const int numseq = fx_param_int(NULL, "numseq", 10);
00130   const char* seqout = fx_param_str(NULL, "seqfile", "seq.mix.out");
00131   const char* stateout = fx_param_str(NULL, "statefile", "state.mix.out");
00132 
00133   DEBUG_ASSERT_MSG(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
00134   DEBUG_ASSERT_MSG(numseq > 0, "NUMSEQ must be positive");
00135 
00136   double step = (double) (seqlmax-seqlen) / numseq;
00137 
00138   MixtureofGaussianHMM hmm;
00139   hmm.InitFromFile(profile);
00140   
00141   TextWriter w_seq, w_state;
00142   if (!PASSED(w_seq.Open(seqout))) {
00143     NONFATAL("Couldn't open '%s' for writing.", seqout);
00144     return SUCCESS_FAIL;
00145   }
00146 
00147   if (!PASSED(w_state.Open(stateout))) {
00148     NONFATAL("Couldn't open '%s' for writing.", stateout);
00149     return SUCCESS_FAIL;
00150   }
00151 
00152   double L = seqlen;
00153   for (int i = 0; i < numseq; i++, L+=step) {
00154     Matrix seq;
00155     Vector states;
00156     char s[100];
00157 
00158     hmm.GenerateSequence((int)L, &seq, &states);
00159     
00160     sprintf(s, "%% sequence %d", i);
00161     print_matrix(w_seq, seq, s, "%E,");    
00162     sprintf(s, "%% state sequence %d", i);
00163     print_vector(w_state, states, s, "%.0f,");    
00164   }
00165 
00166   //printf("---END---");
00167   return SUCCESS_PASS;
00168 }
00169 
00170 success_t generate_gaussian() {
00171   if (!fx_param_exists(NULL, "profile")) {
00172     printf("--profile must be defined.\n");
00173     return SUCCESS_FAIL;
00174   }
00175   const char* profile = fx_param_str_req(NULL, "profile");
00176   const int seqlen = fx_param_int(NULL, "length", 10);
00177   const int seqlmax = fx_param_int(NULL, "lenmax", seqlen);
00178   const int numseq = fx_param_int(NULL, "numseq", 10);
00179   const char* seqout = fx_param_str(NULL, "seqfile", "seq.gauss.out");
00180   const char* stateout = fx_param_str(NULL, "statefile", "state.gauss.out");
00181 
00182   DEBUG_ASSERT_MSG(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
00183   DEBUG_ASSERT_MSG(numseq > 0, "NUMSEQ must be positive");
00184 
00185   double step = (double) (seqlmax-seqlen) / numseq;
00186 
00187   GaussianHMM hmm;
00188   hmm.InitFromFile(profile);
00189   
00190   TextWriter w_seq, w_state;
00191   if (!PASSED(w_seq.Open(seqout))) {
00192     NONFATAL("Couldn't open '%s' for writing.", seqout);
00193     return SUCCESS_FAIL;
00194   }
00195 
00196   if (!PASSED(w_state.Open(stateout))) {
00197     NONFATAL("Couldn't open '%s' for writing.", stateout);
00198     return SUCCESS_FAIL;
00199   }
00200 
00201   double L = seqlen;
00202   for (int i = 0; i < numseq; i++, L+=step) {
00203     Matrix seq;
00204     Vector states;
00205     char s[100];
00206 
00207     hmm.GenerateSequence((int)L, &seq, &states);
00208     
00209     sprintf(s, "%% sequence %d", i);
00210     print_matrix(w_seq, seq, s, "%E,");    
00211     sprintf(s, "%% state sequence %d", i);
00212     print_vector(w_state, states, s, "%.0f,");    
00213   }
00214   return SUCCESS_PASS;
00215 }
00216 
00217 success_t generate_discrete() {
00218   if (!fx_param_exists(NULL, "profile")) {
00219     printf("--profile must be defined.\n");
00220     return SUCCESS_FAIL;
00221   }
00222   const char* profile = fx_param_str_req(NULL, "profile");
00223   const int seqlen = fx_param_int(NULL, "length", 10);
00224   const int seqlmax = fx_param_int(NULL, "lenmax", seqlen);
00225   const int numseq = fx_param_int(NULL, "numseq", 10);
00226   const char* seqout = fx_param_str(NULL, "seqfile", "seq.out");
00227   const char* stateout = fx_param_str(NULL, "statefile", "state.out");
00228 
00229   DEBUG_ASSERT_MSG(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
00230   DEBUG_ASSERT_MSG(numseq > 0, "NUMSEQ must be positive");
00231 
00232   double step = (double) (seqlmax-seqlen) / numseq;
00233 
00234   DiscreteHMM hmm;
00235   hmm.InitFromFile(profile);
00236 
00237   TextWriter w_seq, w_state;
00238   if (!PASSED(w_seq.Open(seqout))) {
00239     NONFATAL("Couldn't open '%s' for writing.", seqout);
00240     return SUCCESS_FAIL;
00241   }
00242 
00243   if (!PASSED(w_state.Open(stateout))) {
00244     NONFATAL("Couldn't open '%s' for writing.", stateout);
00245     return SUCCESS_FAIL;
00246   }
00247 
00248   double L = seqlen;
00249   for (int i = 0; i < numseq; i++, L+=step) {
00250     Vector seq, states;
00251     char s[100];
00252 
00253     hmm.GenerateSequence((int)L, &seq, &states);
00254     
00255     sprintf(s, "%% sequence %d", i);
00256     print_vector(w_seq, seq, s, "%.0f,");    
00257     sprintf(s, "%% state sequence %d", i);
00258     print_vector(w_state, states, s, "%.0f,");    
00259   }
00260   return SUCCESS_PASS;
00261 }
Generated on Mon Jan 24 12:04:38 2011 for FASTlib by  doxygen 1.6.3