00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00043 #include "fastlib/fastlib.h"
00044 #include "support.h"
00045 #include "discreteHMM.h"
00046 #include "gaussianHMM.h"
00047 #include "mixgaussHMM.h"
00048 #include "mixtureDST.h"
00049
00050 using namespace hmm_support;
00051
00052 success_t generate_discrete();
00053 success_t generate_gaussian();
00054 success_t generate_mixture();
00055 void usage();
00056
00057 const fx_entry_doc hmm_generate_main_entries[] = {
00058 {"type", FX_REQUIRED, FX_STR, NULL,
00059 " HMM type : discrete | gaussian | mixture.\n"},
00060 {"profile", FX_REQUIRED, FX_STR, NULL,
00061 " A file containing HMM profile.\n"},
00062 {"length", FX_PARAM, FX_INT, NULL,
00063 " Sequence length, default = 10.\n"},
00064 {"lenmax", FX_PARAM, FX_INT, NULL,
00065 " Maximum sequence length, default = length\n"},
00066 {"numseq", FX_PARAM, FX_INT, NULL,
00067 " Number of sequance, default = 10.\n"},
00068 {"seqfile", FX_PARAM, FX_STR, NULL,
00069 " Output file for the generated sequences.\n"},
00070 {"statefile", FX_PARAM, FX_STR, NULL,
00071 " Output file for the generated state sequences.\n"},
00072 FX_ENTRY_DOC_DONE
00073 };
00074
00075 const fx_submodule_doc hmm_generate_main_submodules[] = {
00076 FX_SUBMODULE_DOC_DONE
00077 };
00078
00079 const fx_module_doc hmm_generate_main_doc = {
00080 hmm_generate_main_entries, hmm_generate_main_submodules,
00081 "This is a program generating sequences from HMM models.\n"
00082 };
00083
00084 int main(int argc, char* argv[]) {
00085 fx_init(argc, argv, &hmm_generate_main_doc );
00086 success_t s = SUCCESS_PASS;
00087 if (fx_param_exists(NULL,"type")) {
00088 const char* type = fx_param_str_req(NULL, "type");
00089 if (strcmp(type, "discrete")==0)
00090 s = generate_discrete();
00091 else if (strcmp(type, "gaussian")==0)
00092 s = generate_gaussian();
00093 else if (strcmp(type, "mixture")==0)
00094 s = generate_mixture();
00095 else {
00096 printf("Unrecognized type: must be: discrete | gaussian | mixture !!!\n");
00097 return SUCCESS_PASS;
00098 }
00099 }
00100 else {
00101 printf("Unrecognized type: must be: discrete | gaussian | mixture !!!\n");
00102 s = SUCCESS_FAIL;
00103 }
00104 if (!PASSED(s)) usage();
00105
00106 fx_done(NULL);
00107 }
00108
00109 void usage() {
00110 printf("\nUsage:\n");
00111 printf(" generate --type=={discrete|gaussian|mixture} OPTIONS\n");
00112 printf("[OPTIONS]\n");
00113 printf(" --profile=file : file contains HMM profile\n");
00114 printf(" --length=NUM : sequence length\n");
00115 printf(" --lenmax=NUM : maximum sequence length, default = length\n");
00116 printf(" --numseq=NUM : number of sequence\n");
00117 printf(" --seqfile=file : output file for generated sequences\n");
00118 printf(" --statefile=file : output file for generated state sequences\n");
00119 }
00120
00121 success_t generate_mixture() {
00122 if (!fx_param_exists(NULL, "profile")) {
00123 printf("--profile must be defined.\n");
00124 return SUCCESS_FAIL;
00125 }
00126 const char* profile = fx_param_str_req(NULL, "profile");
00127 const int seqlen = fx_param_int(NULL, "length", 10);
00128 const int seqlmax = fx_param_int(NULL, "lenmax", seqlen);
00129 const int numseq = fx_param_int(NULL, "numseq", 10);
00130 const char* seqout = fx_param_str(NULL, "seqfile", "seq.mix.out");
00131 const char* stateout = fx_param_str(NULL, "statefile", "state.mix.out");
00132
00133 DEBUG_ASSERT_MSG(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
00134 DEBUG_ASSERT_MSG(numseq > 0, "NUMSEQ must be positive");
00135
00136 double step = (double) (seqlmax-seqlen) / numseq;
00137
00138 MixtureofGaussianHMM hmm;
00139 hmm.InitFromFile(profile);
00140
00141 TextWriter w_seq, w_state;
00142 if (!PASSED(w_seq.Open(seqout))) {
00143 NONFATAL("Couldn't open '%s' for writing.", seqout);
00144 return SUCCESS_FAIL;
00145 }
00146
00147 if (!PASSED(w_state.Open(stateout))) {
00148 NONFATAL("Couldn't open '%s' for writing.", stateout);
00149 return SUCCESS_FAIL;
00150 }
00151
00152 double L = seqlen;
00153 for (int i = 0; i < numseq; i++, L+=step) {
00154 Matrix seq;
00155 Vector states;
00156 char s[100];
00157
00158 hmm.GenerateSequence((int)L, &seq, &states);
00159
00160 sprintf(s, "%% sequence %d", i);
00161 print_matrix(w_seq, seq, s, "%E,");
00162 sprintf(s, "%% state sequence %d", i);
00163 print_vector(w_state, states, s, "%.0f,");
00164 }
00165
00166
00167 return SUCCESS_PASS;
00168 }
00169
00170 success_t generate_gaussian() {
00171 if (!fx_param_exists(NULL, "profile")) {
00172 printf("--profile must be defined.\n");
00173 return SUCCESS_FAIL;
00174 }
00175 const char* profile = fx_param_str_req(NULL, "profile");
00176 const int seqlen = fx_param_int(NULL, "length", 10);
00177 const int seqlmax = fx_param_int(NULL, "lenmax", seqlen);
00178 const int numseq = fx_param_int(NULL, "numseq", 10);
00179 const char* seqout = fx_param_str(NULL, "seqfile", "seq.gauss.out");
00180 const char* stateout = fx_param_str(NULL, "statefile", "state.gauss.out");
00181
00182 DEBUG_ASSERT_MSG(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
00183 DEBUG_ASSERT_MSG(numseq > 0, "NUMSEQ must be positive");
00184
00185 double step = (double) (seqlmax-seqlen) / numseq;
00186
00187 GaussianHMM hmm;
00188 hmm.InitFromFile(profile);
00189
00190 TextWriter w_seq, w_state;
00191 if (!PASSED(w_seq.Open(seqout))) {
00192 NONFATAL("Couldn't open '%s' for writing.", seqout);
00193 return SUCCESS_FAIL;
00194 }
00195
00196 if (!PASSED(w_state.Open(stateout))) {
00197 NONFATAL("Couldn't open '%s' for writing.", stateout);
00198 return SUCCESS_FAIL;
00199 }
00200
00201 double L = seqlen;
00202 for (int i = 0; i < numseq; i++, L+=step) {
00203 Matrix seq;
00204 Vector states;
00205 char s[100];
00206
00207 hmm.GenerateSequence((int)L, &seq, &states);
00208
00209 sprintf(s, "%% sequence %d", i);
00210 print_matrix(w_seq, seq, s, "%E,");
00211 sprintf(s, "%% state sequence %d", i);
00212 print_vector(w_state, states, s, "%.0f,");
00213 }
00214 return SUCCESS_PASS;
00215 }
00216
00217 success_t generate_discrete() {
00218 if (!fx_param_exists(NULL, "profile")) {
00219 printf("--profile must be defined.\n");
00220 return SUCCESS_FAIL;
00221 }
00222 const char* profile = fx_param_str_req(NULL, "profile");
00223 const int seqlen = fx_param_int(NULL, "length", 10);
00224 const int seqlmax = fx_param_int(NULL, "lenmax", seqlen);
00225 const int numseq = fx_param_int(NULL, "numseq", 10);
00226 const char* seqout = fx_param_str(NULL, "seqfile", "seq.out");
00227 const char* stateout = fx_param_str(NULL, "statefile", "state.out");
00228
00229 DEBUG_ASSERT_MSG(seqlen <= seqlmax, "LENMAX must bigger than LENGTH");
00230 DEBUG_ASSERT_MSG(numseq > 0, "NUMSEQ must be positive");
00231
00232 double step = (double) (seqlmax-seqlen) / numseq;
00233
00234 DiscreteHMM hmm;
00235 hmm.InitFromFile(profile);
00236
00237 TextWriter w_seq, w_state;
00238 if (!PASSED(w_seq.Open(seqout))) {
00239 NONFATAL("Couldn't open '%s' for writing.", seqout);
00240 return SUCCESS_FAIL;
00241 }
00242
00243 if (!PASSED(w_state.Open(stateout))) {
00244 NONFATAL("Couldn't open '%s' for writing.", stateout);
00245 return SUCCESS_FAIL;
00246 }
00247
00248 double L = seqlen;
00249 for (int i = 0; i < numseq; i++, L+=step) {
00250 Vector seq, states;
00251 char s[100];
00252
00253 hmm.GenerateSequence((int)L, &seq, &states);
00254
00255 sprintf(s, "%% sequence %d", i);
00256 print_vector(w_seq, seq, s, "%.0f,");
00257 sprintf(s, "%% state sequence %d", i);
00258 print_vector(w_state, states, s, "%.0f,");
00259 }
00260 return SUCCESS_PASS;
00261 }