00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00043 #include "fastlib/fastlib.h"
00044 #include "support.h"
00045 #include "discreteHMM.h"
00046 #include "gaussianHMM.h"
00047 #include "mixgaussHMM.h"
00048 #include "mixtureDST.h"
00049 
00050 using namespace hmm_support;
00051 
00052 success_t generate_discrete();
00053 success_t generate_gaussian();
00054 success_t generate_mixture();
00055 void usage();
00056 
00057 const fx_entry_doc hmm_generate_main_entries[] = {
00058   {"type", FX_REQUIRED, FX_STR, NULL,
00059    "  HMM type : discrete | gaussian | mixture.\n"},
00060   {"profile", FX_REQUIRED, FX_STR, NULL,
00061    "  A file containing HMM profile.\n"},
00062   {"length", FX_PARAM, FX_INT, NULL,
00063    "  Sequence length, default = 10.\n"},
00064   {"lenmax", FX_PARAM, FX_INT, NULL,
00065    "  Maximum sequence length, default = length\n"},
00066   {"numseq", FX_PARAM, FX_INT, NULL,
00067    "  Number of sequance, default = 10.\n"},
00068   {"seqfile", FX_PARAM, FX_STR, NULL,
00069    "  Output file for the generated sequences.\n"},
00070   {"statefile", FX_PARAM, FX_STR, NULL,
00071    "  Output file for the generated state sequences.\n"},
00072   FX_ENTRY_DOC_DONE
00073 };
00074 
00075 const fx_submodule_doc hmm_generate_main_submodules[] = {
00076   FX_SUBMODULE_DOC_DONE
00077 };
00078 
00079 const fx_module_doc hmm_generate_main_doc = {
00080   hmm_generate_main_entries, hmm_generate_main_submodules,
00081   "This is a program generating sequences from HMM models.\n"
00082 };
00083 
00084 int main(int argc, char* argv[]) {
00085   fx_init(argc, argv, &hmm_generate_main_doc );
00086   success_t s = SUCCESS_PASS;
00087   if (fx_param_exists(NULL,"type")) {
00088     const char* type = fx_param_str_req(NULL, "type");
00089     if (strcmp(type, "discrete")==0)
00090       s = generate_discrete();
00091     else if (strcmp(type, "gaussian")==0) 
00092       s = generate_gaussian();
00093     else if (strcmp(type, "mixture")==0)
00094       s = generate_mixture();
00095     else {
00096       printf("Unrecognized type: must be: discrete | gaussian | mixture !!!\n");
00097       return SUCCESS_PASS;
00098     }
00099   }
00100   else {
00101     printf("Unrecognized type: must be: discrete | gaussian | mixture  !!!\n");
00102     s = SUCCESS_FAIL;
00103   }
00104   if (!PASSED(s)) usage();
00105 
00106   fx_done(NULL);
00107 }
00108 
00109 void usage() {
00110   printf("\nUsage:\n");
00111   printf("  generate --type=={discrete|gaussian|mixture} OPTIONS\n");
00112   printf("[OPTIONS]\n");
00113   printf("  --profile=file   : file contains HMM profile\n");
00114   printf("  --length=NUM     : sequence length\n");
00115   printf("  --lenmax=NUM     : maximum sequence length, default = length\n");
00116   printf("  --numseq=NUM     : number of sequence\n");
00117   printf("  --seqfile=file   : output file for generated sequences\n");
00118   printf("  --statefile=file : output file for generated state sequences\n");
00119 }
00120 
00121 success_t generate_mixture() {
00122   if (!fx_param_exists(NULL, "profile")) {
00123     printf("--profile must be defined.\n");
00124     return SUCCESS_FAIL;
00125   }
00126   const char* profile = fx_param_str_req(NULL, "profile");
00127   const int seqlen = fx_param_int(NULL, "length", 10);
00128   const int seqlmax = fx_param_int(NULL, "lenmax", seqlen);
00129   const int numseq = fx_param_int(NULL, "numseq", 10);
00130   const char* seqout = fx_param_str(NULL, "seqfile", "seq.mix.out");
00131   const char* stateout = fx_param_str(NULL, "statefile", "state.mix.out");
00132 
00133   DEBUG_ASSERT_MSG(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
00134   DEBUG_ASSERT_MSG(numseq > 0, "NUMSEQ must be positive");
00135 
00136   double step = (double) (seqlmax-seqlen) / numseq;
00137 
00138   MixtureofGaussianHMM hmm;
00139   hmm.InitFromFile(profile);
00140   
00141   TextWriter w_seq, w_state;
00142   if (!PASSED(w_seq.Open(seqout))) {
00143     NONFATAL("Couldn't open '%s' for writing.", seqout);
00144     return SUCCESS_FAIL;
00145   }
00146 
00147   if (!PASSED(w_state.Open(stateout))) {
00148     NONFATAL("Couldn't open '%s' for writing.", stateout);
00149     return SUCCESS_FAIL;
00150   }
00151 
00152   double L = seqlen;
00153   for (int i = 0; i < numseq; i++, L+=step) {
00154     Matrix seq;
00155     Vector states;
00156     char s[100];
00157 
00158     hmm.GenerateSequence((int)L, &seq, &states);
00159     
00160     sprintf(s, "%% sequence %d", i);
00161     print_matrix(w_seq, seq, s, "%E,");    
00162     sprintf(s, "%% state sequence %d", i);
00163     print_vector(w_state, states, s, "%.0f,");    
00164   }
00165 
00166   
00167   return SUCCESS_PASS;
00168 }
00169 
00170 success_t generate_gaussian() {
00171   if (!fx_param_exists(NULL, "profile")) {
00172     printf("--profile must be defined.\n");
00173     return SUCCESS_FAIL;
00174   }
00175   const char* profile = fx_param_str_req(NULL, "profile");
00176   const int seqlen = fx_param_int(NULL, "length", 10);
00177   const int seqlmax = fx_param_int(NULL, "lenmax", seqlen);
00178   const int numseq = fx_param_int(NULL, "numseq", 10);
00179   const char* seqout = fx_param_str(NULL, "seqfile", "seq.gauss.out");
00180   const char* stateout = fx_param_str(NULL, "statefile", "state.gauss.out");
00181 
00182   DEBUG_ASSERT_MSG(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
00183   DEBUG_ASSERT_MSG(numseq > 0, "NUMSEQ must be positive");
00184 
00185   double step = (double) (seqlmax-seqlen) / numseq;
00186 
00187   GaussianHMM hmm;
00188   hmm.InitFromFile(profile);
00189   
00190   TextWriter w_seq, w_state;
00191   if (!PASSED(w_seq.Open(seqout))) {
00192     NONFATAL("Couldn't open '%s' for writing.", seqout);
00193     return SUCCESS_FAIL;
00194   }
00195 
00196   if (!PASSED(w_state.Open(stateout))) {
00197     NONFATAL("Couldn't open '%s' for writing.", stateout);
00198     return SUCCESS_FAIL;
00199   }
00200 
00201   double L = seqlen;
00202   for (int i = 0; i < numseq; i++, L+=step) {
00203     Matrix seq;
00204     Vector states;
00205     char s[100];
00206 
00207     hmm.GenerateSequence((int)L, &seq, &states);
00208     
00209     sprintf(s, "%% sequence %d", i);
00210     print_matrix(w_seq, seq, s, "%E,");    
00211     sprintf(s, "%% state sequence %d", i);
00212     print_vector(w_state, states, s, "%.0f,");    
00213   }
00214   return SUCCESS_PASS;
00215 }
00216 
00217 success_t generate_discrete() {
00218   if (!fx_param_exists(NULL, "profile")) {
00219     printf("--profile must be defined.\n");
00220     return SUCCESS_FAIL;
00221   }
00222   const char* profile = fx_param_str_req(NULL, "profile");
00223   const int seqlen = fx_param_int(NULL, "length", 10);
00224   const int seqlmax = fx_param_int(NULL, "lenmax", seqlen);
00225   const int numseq = fx_param_int(NULL, "numseq", 10);
00226   const char* seqout = fx_param_str(NULL, "seqfile", "seq.out");
00227   const char* stateout = fx_param_str(NULL, "statefile", "state.out");
00228 
00229   DEBUG_ASSERT_MSG(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
00230   DEBUG_ASSERT_MSG(numseq > 0, "NUMSEQ must be positive");
00231 
00232   double step = (double) (seqlmax-seqlen) / numseq;
00233 
00234   DiscreteHMM hmm;
00235   hmm.InitFromFile(profile);
00236 
00237   TextWriter w_seq, w_state;
00238   if (!PASSED(w_seq.Open(seqout))) {
00239     NONFATAL("Couldn't open '%s' for writing.", seqout);
00240     return SUCCESS_FAIL;
00241   }
00242 
00243   if (!PASSED(w_state.Open(stateout))) {
00244     NONFATAL("Couldn't open '%s' for writing.", stateout);
00245     return SUCCESS_FAIL;
00246   }
00247 
00248   double L = seqlen;
00249   for (int i = 0; i < numseq; i++, L+=step) {
00250     Vector seq, states;
00251     char s[100];
00252 
00253     hmm.GenerateSequence((int)L, &seq, &states);
00254     
00255     sprintf(s, "%% sequence %d", i);
00256     print_vector(w_seq, seq, s, "%.0f,");    
00257     sprintf(s, "%% state sequence %d", i);
00258     print_vector(w_state, states, s, "%.0f,");    
00259   }
00260   return SUCCESS_PASS;
00261 }