Main Page | Class List | File List | Class Members

discrete_dist.h

00001 /******************* 00002 Discrete distribution 00003 ********************/ 00004 #ifndef _DISCRETE_DIST_H_ 00005 #define _DISCRETE_DIST_H_ 00006 00007 #include <vector> 00008 #include <iostream> 00009 #include <math.h> 00010 #include "librf/types.h" 00011 00012 using namespace std; 00013 00014 namespace librf { 00015 00016 class DiscreteDist { 00017 public: 00018 DiscreteDist(int size = 2) : sum_(0), size_(size) 00019 { 00020 counter_ = new unsigned int[size]; 00021 for (int i =0; i < size; ++i) { 00022 counter_[i] = 0; 00023 } 00024 } 00025 ~DiscreteDist() { 00026 delete [] counter_; 00027 } 00028 void add(int value, unsigned int weight=1) { 00029 counter_[value] += weight; 00030 sum_ += weight; 00031 } 00032 void remove(int value, unsigned int weight=1) { 00033 counter_[value] -= weight; 00034 sum_ -= weight; 00035 } 00036 unsigned int sum() const { 00037 return sum_; 00038 } 00039 int mode() const { 00040 int max = -1; 00041 int mode = -10; 00042 for (int i = 0; i< size_; ++i) { 00043 int val = counter_[i]; 00044 if (val > max) { 00045 max = val; 00046 mode = i; 00047 } 00048 } 00049 return mode; 00050 } 00051 void print() { 00052 for (int i = 0; i < size_; ++i) { 00053 cout << i << ":" << int(counter_[i]) << endl; 00054 } 00055 } 00056 unsigned int num_labels() const { 00057 return size_; 00058 } 00059 unsigned int weight(int i) const { 00060 return counter_[i]; 00061 } 00062 static const double kLog2; 00063 static float entropy_conditioned_naive(const DiscreteDist* sets, 00064 int num_dists) { 00065 float H = 0; 00066 // H(Y |X) = Sum Prob(X=x) H(Y | x = x) 00067 float total = 0; 00068 for (int i = 0; i < num_dists; ++i) { 00069 float split_entropy = 0; 00070 float split_total = 0; 00071 for (int j = 0; j< sets[i].num_labels(); ++j) { 00072 float weight = sets[i].weight(j); 00073 split_entropy -= lnFunc(weight); 00074 split_total += weight; 00075 total += weight; 00076 cerr << j << ":" << weight <<endl; 00077 } 00078 if (split_total == 0) { 00079 split_entropy = 0; 00080 } else { 00081 split_entropy = (split_entropy + lnFunc(split_total) ) / 00082 (split_total *kLog2); 00083 } 00084 cerr << "Split " << i << ":" << split_entropy <<endl 00085 ; 00086 H += split_total * split_entropy; 00087 } 00088 return H / (total); 00089 } 00090 static float entropy_conditioned(const DiscreteDist* sets, int num_dists) { 00091 float returnValue = 0; 00092 float total = 0; 00093 float sumForSet; 00094 00095 for (int i = 0; i < num_dists; ++i ) { 00096 sumForSet = 0; 00097 for (int j = 0; j < sets[i].num_labels(); ++j) { 00098 float weight = sets[i].weight(j); 00099 returnValue += lnFunc(weight); 00100 sumForSet += weight; 00101 } 00102 returnValue -= lnFunc(sumForSet); 00103 total += sumForSet; 00104 } 00105 if (total == 0){ 00106 return 0; 00107 } 00108 returnValue = -returnValue /(total *kLog2); 00109 assert (returnValue == returnValue); 00110 return returnValue; 00111 } 00112 00113 // Adapted from ContingencyTables.java: entropyOverColumns 00114 float entropy_over_classes() const{ 00115 float returnValue = 0; 00116 float total = 0; 00117 for (int i = 0; i < size_; ++i) { 00118 returnValue -= lnFunc(counter_[i]); 00119 total += counter_[i]; 00120 } 00121 if (total == 0) { 00122 return 0; 00123 } 00124 return (returnValue + lnFunc(total)) / (total * kLog2); 00125 } 00126 private: 00127 unsigned int sum_; 00128 unsigned int size_; 00129 static float lnFunc(float num) { 00130 if (num < 1e-6) { 00131 return 0; 00132 } else { 00133 return num * log(num); 00134 } 00135 } 00136 unsigned int* counter_; 00137 }; 00138 } // namespace 00139 #endif

Generated on Mon Jan 8 23:19:06 2007 for librf by doxygen 1.3.7