00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00040 #ifndef MOG_EM_H
00041 #define MOG_EM_H
00042
00043 #include <fastlib/fastlib.h>
00044
00045 const fx_entry_doc mog_em_entries[] = {
00046 {"K", FX_PARAM, FX_INT, NULL,
00047 " The number of Gaussians in the mixture model."
00048 " (defaults to 1)\n"},
00049 {"D", FX_RESERVED, FX_INT, NULL,
00050 " The number of dimensions of the data on which the"
00051 " the mixture model is to be fit.\n"},
00052 {"model_init", FX_TIMER, FX_CUSTOM, NULL,
00053 " The time required to initialize the mixture model.\n"},
00054 {"EM", FX_TIMER, FX_CUSTOM, NULL,
00055 " The time required for the EM algorithm to obtain"
00056 " the parameter values.\n"},
00057 FX_ENTRY_DOC_DONE
00058 };
00059
00060 const fx_module_doc mog_em_doc = {
00061 mog_em_entries, NULL,
00062 " This program defines a Gaussian mixture model"
00063 " and estimates the parameters using maximum"
00064 " likelihood via the EM algorithm.\n"
00065 };
00066
00085 class MoGEM {
00086
00087 private:
00088
00089
00090 ArrayList<Vector> mu_;
00091 ArrayList<Matrix> sigma_;
00092 Vector omega_;
00093 index_t number_of_gaussians_;
00094 index_t dimension_;
00095
00096 public:
00097
00098 MoGEM() {
00099 mu_.Init(0);
00100 sigma_.Init(0);
00101 }
00102
00103 ~MoGEM() {
00104 }
00105
00106 void Init(index_t num_gauss, index_t dimension) {
00107
00108
00109 number_of_gaussians_ = num_gauss;
00110 dimension_ = dimension;
00111
00112
00113 mu_.Resize(number_of_gaussians_);
00114 sigma_.Resize(number_of_gaussians_);
00115 }
00116
00117 void Init(datanode *mog_em_module) {
00118
00119 index_t num_gauss = fx_param_int_req(mog_em_module, "K");
00120 index_t dim = fx_param_int_req(mog_em_module, "D");
00121 Init(num_gauss, dim);
00122 }
00123
00124
00125 ArrayList<Vector>& mu() {
00126 return mu_;
00127 }
00128
00129 ArrayList<Matrix>& sigma() {
00130 return sigma_;
00131 }
00132
00133 Vector& omega() {
00134 return omega_;
00135 }
00136
00137 index_t number_of_gaussians() {
00138 return number_of_gaussians_;
00139 }
00140
00141 index_t dimension() {
00142 return dimension_;
00143 }
00144
00145 Vector& mu(index_t i) {
00146 return mu_[i] ;
00147 }
00148
00149 Matrix& sigma(index_t i) {
00150 return sigma_[i];
00151 }
00152
00153 double omega(index_t i) {
00154 return omega_.get(i);
00155 }
00156
00157
00158
00159 void set_mu(index_t i, Vector& mu) {
00160 DEBUG_ASSERT(i < number_of_gaussians());
00161 DEBUG_ASSERT(mu.length() == dimension());
00162 mu_[i].Copy(mu);
00163 return;
00164 }
00165
00166 void set_mu(index_t i, index_t length, const double *mu) {
00167 DEBUG_ASSERT(i < number_of_gaussians());
00168 DEBUG_ASSERT(length == dimension());
00169 mu_[i].Copy(mu, length);
00170 return;
00171 }
00172
00173 void set_sigma(index_t i, Matrix& sigma) {
00174 DEBUG_ASSERT(i < number_of_gaussians());
00175 DEBUG_ASSERT(sigma.n_rows() == dimension());
00176 DEBUG_ASSERT(sigma.n_cols() == dimension());
00177 sigma_[i].Copy(sigma);
00178 return;
00179 }
00180
00181 void set_omega(Vector& omega) {
00182 DEBUG_ASSERT(omega.length() == number_of_gaussians());
00183 omega_.Copy(omega);
00184 return;
00185 }
00186
00187 void set_omega(index_t length, const double *omega) {
00188 DEBUG_ASSERT(length == number_of_gaussians());
00189 omega_.Copy(omega, length);
00190 return;
00191 }
00192
00193
00203 void OutputResults(ArrayList<double> *results) {
00204
00205
00206 (*results).Init(number_of_gaussians_ * (1 + dimension_*(1 + dimension_)));
00207
00208
00209 for (index_t i = 0; i < number_of_gaussians_; i++) {
00210 (*results)[i] = omega(i);
00211 for (index_t j = 0; j < dimension_; j++) {
00212 (*results)[number_of_gaussians_ + i*dimension_ + j] = mu(i).get(j);
00213 for (index_t k = 0; k < dimension_; k++) {
00214 (*results)[number_of_gaussians_*(1 + dimension_)
00215 + i*dimension_*dimension_ + j*dimension_
00216 + k] = sigma(i).get(j, k);
00217 }
00218 }
00219 }
00220 }
00221
00229 void Display(){
00230
00231
00232 printf(" Omega : [ ");
00233 for (index_t i = 0; i < number_of_gaussians_; i++) {
00234 printf("%lf ", omega(i));
00235 }
00236 printf("]\n");
00237 printf(" Mu : \n[");
00238 for (index_t i = 0; i < number_of_gaussians_; i++) {
00239 for (index_t j = 0; j < dimension_ ; j++) {
00240 printf("%lf ", mu(i).get(j));
00241 }
00242 printf(";");
00243 if (i == (number_of_gaussians_ - 1)) {
00244 printf("\b]\n");
00245 }
00246 }
00247 printf("Sigma : ");
00248 for (index_t i = 0; i < number_of_gaussians_; i++) {
00249 printf("\n[");
00250 for (index_t j = 0; j < dimension_ ; j++) {
00251 for(index_t k = 0; k < dimension_ ; k++) {
00252 printf("%lf ",sigma(i).get(j, k));
00253 }
00254 printf(";");
00255 }
00256 printf("\b]");
00257 }
00258 printf("\n");
00259 }
00260
00273 void ExpectationMaximization(Matrix& data_points);
00274
00281 long double Loglikelihood(Matrix& data_points, ArrayList<Vector>& means,
00282 ArrayList<Matrix>& covars, Vector& weights);
00283
00293 void KMeans(Matrix& data, ArrayList<Vector> *means,
00294 ArrayList<Matrix> *covars, Vector *weights, index_t value_of_k);
00295 };
00296
00297 #endif