IT++ Logo

mog_diag_kmeans.cpp

Go to the documentation of this file.
00001 
00031 #include <itpp/stat/mog_diag_kmeans.h>
00032 #include <iostream>
00033 
00034 namespace itpp
00035 {
00036 
00037 inline double MOG_diag_kmeans_sup::dist(const double * x, const double * y) const
00038 {
00039   double acc = 0.0;
00040   for (int d = 0;d < D;d++) { double tmp = x[d] - y[d]; acc += tmp * tmp; }
00041   return(acc);
00042 }
00043 
00044 void MOG_diag_kmeans_sup::assign_to_means()
00045 {
00046 
00047   for (int k = 0;k < K;k++) c_count[k] = 0;
00048 
00049   for (int n = 0;n < N;n++) {
00050 
00051     int k = 0;
00052     double min_dist = dist(c_means[k], c_X[n]);
00053     int k_winner = k;
00054 
00055     for (int k = 1;k < K;k++) {
00056       double tmp_dist = dist(c_means[k], c_X[n]);
00057       if (tmp_dist < min_dist) { min_dist = tmp_dist; k_winner = k; }
00058     }
00059 
00060     c_partitions[ k_winner ][ count[k_winner] ] = n;
00061     c_count[k_winner]++;
00062   }
00063 }
00064 
00065 
00066 void MOG_diag_kmeans_sup::recalculate_means()
00067 {
00068 
00069   for (int k = 0;k < K;k++) {
00070 
00071     for (int d = 0;d < D;d++)  c_tmpvec[d] = 0.0;
00072 
00073     int Nk = c_count[k];
00074 
00075     for (int n = 0;n < Nk;n++) {
00076       double * x = c_X[ c_partitions[k][n] ];
00077       for (int d = 0;d < D;d++)  c_tmpvec[d] += x[d];
00078     }
00079 
00080     if (Nk > 0) {
00081       double * c_mean = c_means[k];
00082       for (int d = 0;d < D;d++)  c_mean[d] = c_tmpvec[d] / Nk;
00083     }
00084   }
00085 
00086 }
00087 
00088 
00089 bool MOG_diag_kmeans_sup::dezombify_means()
00090 {
00091 
00092   static int counter = 0;
00093 
00094   bool zombie_mean = false;
00095 
00096   int k = 0;
00097   int max_count = count[k];
00098   int k_hog = k;
00099 
00100   for (int k = 1;k < K;k++)  if (c_count[k] > max_count) { max_count = c_count[k]; k_hog = k; }
00101 
00102   for (int k = 0;k < K;k++) {
00103     if (c_count[k] == 0) {
00104 
00105       zombie_mean = true;
00106       if (verbose) {
00107         it_warning("MOG_diag_kmeans_sup::dezombify_means(): detected zombie mean");
00108       }
00109       if (k_hog == k) {
00110         it_warning("MOG_diag_kmeans_sup::dezombify_means(): weirdness: k_hog == k");
00111         return(false);
00112       }
00113 
00114       if (counter >= c_count[k_hog])  counter = 0;
00115 
00116       double * c_mean = c_means[k];
00117       double * c_x = c_X[ c_partitions[k_hog][counter] ];
00118 
00119       for (int d = 0;d < D;d++)  c_mean[d] = 0.5 * (c_means[k_hog][d] + c_x[d]);
00120       counter++;
00121     }
00122 
00123   }
00124 
00125   if (zombie_mean)  assign_to_means();
00126 
00127   return(true);
00128 }
00129 
00130 
00131 double MOG_diag_kmeans_sup::measure_change() const
00132 {
00133 
00134   double tmp_dist = 0.0;
00135   for (int k = 0;k < K;k++)   tmp_dist += dist(c_means[k], c_means_old[k]);
00136   return(tmp_dist);
00137 }
00138 
00139 
00140 void MOG_diag_kmeans_sup::initial_means()
00141 {
00142 
00143   for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0;
00144 
00145   for (int n = 0;n < N;n++) {
00146     double * c_x = c_X[n];
00147     for (int d = 0;d < D;d++)  c_tmpvec[d] += c_x[d];
00148   }
00149 
00150   for (int d = 0;d < D;d++) c_tmpvec[d] /= N;
00151 
00152   int step = int(floor(double(N) / double(K)));
00153   for (int k = 0;k < K;k++) {
00154     double * c_mean = c_means[k];
00155     double * c_x = c_X[k*step];
00156 
00157     for (int d = 0;d < D;d++)  c_mean[d] = 0.5 * (c_tmpvec[d] + c_x[d]);
00158   }
00159 }
00160 
00161 
00162 void MOG_diag_kmeans_sup::iterate()
00163 {
00164 
00165   for (int k = 0;k < K;k++)  for (int d = 0;d < D;d++)  c_means_old[k][d] = c_means[k][d];
00166 
00167   for (int i = 0;i < max_iter;i++) {
00168 
00169     assign_to_means();
00170     if (!dezombify_means()) return;
00171     recalculate_means();
00172 
00173     double change = measure_change();
00174 
00175     if (verbose) std::cout << "MOG_diag_kmeans_sup::iterate(): iteration = " << i << "  change = " << change << std::endl;
00176     if (change == 0) break;
00177 
00178     for (int k = 0;k < K;k++)  for (int d = 0;d < D;d++)   c_means_old[k][d] = c_means[k][d];
00179   }
00180 
00181 }
00182 
00183 
00184 void MOG_diag_kmeans_sup::calc_means()
00185 {
00186   initial_means();
00187   iterate();
00188 }
00189 
00190 
00191 void MOG_diag_kmeans_sup::calc_covs()
00192 {
00193 
00194   for (int k = 0;k < K;k++) {
00195     int Nk = c_count[k];
00196 
00197     if (Nk >= 2) {
00198       double * c_mean = c_means[k];
00199 
00200       for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0;
00201 
00202       for (int n = 0;n < Nk;n++) {
00203         double * c_x = c_X[ c_partitions[k][n] ];
00204         for (int d = 0;d < D;d++) { double tmp = c_x[d] - c_mean[d];  c_tmpvec[d] += tmp * tmp; }
00205       }
00206 
00207       for (int d = 0;d < D;d++)  c_diag_covs[k][d] = trust * (c_tmpvec[d] / (Nk - 1.0)) + (1.0 - trust) * (1.0);
00208     }
00209     else {
00210       for (int d = 0;d < D;d++)  c_diag_covs[k][d] = 1.0;
00211     }
00212   }
00213 
00214 }
00215 
00216 
00217 void MOG_diag_kmeans_sup::calc_weights()
00218 {
00219   for (int k = 0;k < K;k++)  c_weights[k] = trust * (c_count[k] / double(N)) + (1.0 - trust) * (1.0 / K);
00220 }
00221 
00222 
00223 void MOG_diag_kmeans_sup::normalise_vectors()
00224 {
00225 
00226   for (int d = 0;d < D;d++) {
00227     double acc = 0.0;
00228     for (int n = 0;n < N;n++)  acc += c_X[n][d];
00229     c_norm_mu[d] = acc / N;
00230   }
00231 
00232   for (int d = 0;d < D;d++) {
00233     double acc = 0.0;
00234     for (int n = 0;n < N;n++) { double tmp = c_X[n][d] - c_norm_mu[d];  acc += tmp * tmp; }
00235     c_norm_sd[d] = std::sqrt(acc / (N - 1));
00236   }
00237 
00238   for (int n = 0;n < N;n++) for (int d = 0;d < D;d++) {
00239       c_X[n][d] -= c_norm_mu[d];
00240       if (c_norm_sd[d] > 0.0)  c_X[n][d] /= c_norm_sd[d];
00241     }
00242 }
00243 
00244 
00245 void MOG_diag_kmeans_sup::unnormalise_vectors()
00246 {
00247 
00248   for (int n = 0;n < N;n++)  for (int d = 0;d < D;d++) {
00249       if (c_norm_sd[d] > 0.0)  c_X[n][d] *= c_norm_sd[d];
00250       c_X[n][d] += c_norm_mu[d];
00251     }
00252 }
00253 
00254 
00255 void MOG_diag_kmeans_sup::unnormalise_means()
00256 {
00257 
00258   for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) {
00259       if (norm_sd[d] > 0.0) c_means[k][d] *= c_norm_sd[d];
00260       c_means[k][d] += norm_mu[d];
00261     }
00262 }
00263 
00264 
00265 void MOG_diag_kmeans_sup::run(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in)
00266 {
00267 
00268   it_assert(model_in.is_valid(), "MOG_diag_kmeans_sup::run(): given model is not valid");
00269   it_assert((max_iter_in > 0), "MOG_diag_kmeans_sup::run(): 'max_iter' needs to be greater than zero");
00270   it_assert(((trust_in >= 0.0) && (trust_in <= 1.0)), "MOG_diag_kmeans_sup::run(): 'trust' must be between 0 and 1 (inclusive)");
00271 
00272   verbose = verbose_in;
00273 
00274   Array<vec> means_in = model_in.get_means();
00275   Array<vec> diag_covs_in = model_in.get_diag_covs();
00276   vec weights_in = model_in.get_weights();
00277 
00278   init(means_in, diag_covs_in, weights_in);
00279 
00280   means_in.set_size(0);
00281   diag_covs_in.set_size(0);
00282   weights_in.set_size(0);
00283 
00284   it_assert(check_size(X_in), "MOG_diag_kmeans_sup::run(): 'X' is empty or contains vectors of wrong dimensionality");
00285 
00286   N = X_in.size();
00287 
00288   if (K > N) {
00289     it_warning("MOG_diag_kmeans_sup::run(): K > N");
00290   }
00291   else {
00292     if (K > N / 10) {
00293       it_warning("MOG_diag_kmeans_sup::run(): K > N/10");
00294     }
00295   }
00296 
00297   max_iter = max_iter_in;
00298   trust = trust_in;
00299 
00300   means_old.set_size(K);
00301   for (int k = 0;k < K;k++) means_old(k).set_size(D);
00302   partitions.set_size(K);
00303   for (int k = 0;k < K;k++) partitions(k).set_size(N);
00304   count.set_size(K);
00305   tmpvec.set_size(D);
00306   norm_mu.set_size(D);
00307   norm_sd.set_size(D);
00308 
00309   c_X = enable_c_access(X_in);
00310   c_means_old = enable_c_access(means_old);
00311   c_partitions = enable_c_access(partitions);
00312   c_count = enable_c_access(count);
00313   c_tmpvec = enable_c_access(tmpvec);
00314   c_norm_mu = enable_c_access(norm_mu);
00315   c_norm_sd = enable_c_access(norm_sd);
00316 
00317   if (normalise_in)  normalise_vectors();
00318 
00319   calc_means();
00320   if (normalise_in) { unnormalise_vectors(); unnormalise_means(); }
00321   calc_covs();
00322   calc_weights();
00323 
00324   model_in.init(means, diag_covs, weights);
00325 
00326   disable_c_access(c_X);
00327   disable_c_access(c_means_old);
00328   disable_c_access(c_partitions);
00329   disable_c_access(c_count);
00330   disable_c_access(c_tmpvec);
00331   disable_c_access(c_norm_mu);
00332   disable_c_access(c_norm_sd);
00333 
00334   means_old.set_size(0);
00335   partitions.set_size(0);
00336   count.set_size(0);
00337   tmpvec.set_size(0);
00338   norm_mu.set_size(0);
00339   norm_sd.set_size(0);
00340 
00341   cleanup();
00342 
00343 }
00344 
00345 //
00346 // convenience functions
00347 
00348 void MOG_diag_kmeans(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in)
00349 {
00350   MOG_diag_kmeans_sup km;
00351   km.run(model_in, X_in, max_iter_in, trust_in, normalise_in, verbose_in);
00352 }
00353 
00354 
00355 }
00356 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
SourceForge Logo

Generated on Sat Feb 26 2011 16:06:35 for IT++ by Doxygen 1.7.3