discrete_dist.h
00001
00002
00003
00004 #ifndef _DISCRETE_DIST_H_
00005 #define _DISCRETE_DIST_H_
00006
00007 #include <vector>
00008 #include <iostream>
00009 #include <math.h>
00010 #include "librf/types.h"
00011
00012 using namespace std;
00013
00014 namespace librf {
00015
00016 class DiscreteDist {
00017 public:
00018 DiscreteDist(int size = 2) : sum_(0), size_(size)
00019 {
00020 counter_ = new unsigned int[size];
00021 for (int i =0; i < size; ++i) {
00022 counter_[i] = 0;
00023 }
00024 }
00025 ~DiscreteDist() {
00026 delete [] counter_;
00027 }
00028 void add(int value, unsigned int weight=1) {
00029 counter_[value] += weight;
00030 sum_ += weight;
00031 }
00032 void remove(int value, unsigned int weight=1) {
00033 counter_[value] -= weight;
00034 sum_ -= weight;
00035 }
00036 unsigned int sum() const {
00037 return sum_;
00038 }
00039 int mode() const {
00040 int max = -1;
00041 int mode = -10;
00042 for (int i = 0; i< size_; ++i) {
00043 int val = counter_[i];
00044 if (val > max) {
00045 max = val;
00046 mode = i;
00047 }
00048 }
00049 return mode;
00050 }
00051 void print() {
00052 for (int i = 0; i < size_; ++i) {
00053 cout << i << ":" << int(counter_[i]) << endl;
00054 }
00055 }
00056 unsigned int num_labels() const {
00057 return size_;
00058 }
00059 unsigned int weight(int i) const {
00060 return counter_[i];
00061 }
00062 static const double kLog2;
00063 static float entropy_conditioned_naive(const DiscreteDist* sets,
00064 int num_dists) {
00065 float H = 0;
00066
00067 float total = 0;
00068 for (int i = 0; i < num_dists; ++i) {
00069 float split_entropy = 0;
00070 float split_total = 0;
00071 for (int j = 0; j< sets[i].num_labels(); ++j) {
00072 float weight = sets[i].weight(j);
00073 split_entropy -= lnFunc(weight);
00074 split_total += weight;
00075 total += weight;
00076 cerr << j << ":" << weight <<endl;
00077 }
00078 if (split_total == 0) {
00079 split_entropy = 0;
00080 } else {
00081 split_entropy = (split_entropy + lnFunc(split_total) ) /
00082 (split_total *kLog2);
00083 }
00084 cerr << "Split " << i << ":" << split_entropy <<endl
00085 ;
00086 H += split_total * split_entropy;
00087 }
00088 return H / (total);
00089 }
00090 static float entropy_conditioned(const DiscreteDist* sets, int num_dists) {
00091 float returnValue = 0;
00092 float total = 0;
00093 float sumForSet;
00094
00095 for (int i = 0; i < num_dists; ++i ) {
00096 sumForSet = 0;
00097 for (int j = 0; j < sets[i].num_labels(); ++j) {
00098 float weight = sets[i].weight(j);
00099 returnValue += lnFunc(weight);
00100 sumForSet += weight;
00101 }
00102 returnValue -= lnFunc(sumForSet);
00103 total += sumForSet;
00104 }
00105 if (total == 0){
00106 return 0;
00107 }
00108 returnValue = -returnValue /(total *kLog2);
00109 assert (returnValue == returnValue);
00110 return returnValue;
00111 }
00112
00113
00114 float entropy_over_classes() const{
00115 float returnValue = 0;
00116 float total = 0;
00117 for (int i = 0; i < size_; ++i) {
00118 returnValue -= lnFunc(counter_[i]);
00119 total += counter_[i];
00120 }
00121 if (total == 0) {
00122 return 0;
00123 }
00124 return (returnValue + lnFunc(total)) / (total * kLog2);
00125 }
00126 private:
00127 unsigned int sum_;
00128 unsigned int size_;
00129 static float lnFunc(float num) {
00130 if (num < 1e-6) {
00131 return 0;
00132 } else {
00133 return num * log(num);
00134 }
00135 }
00136 unsigned int* counter_;
00137 };
00138 }
00139 #endif
Generated on Mon Jan 8 23:19:06 2007 for librf by
1.3.7