00001 /* weights.h 00002 * Instance Weights: 00003 * 00004 * this is trying to be a memory "smart" container that will decide 00005 * whether or not to use sparse/dense representation yet 00006 * still provide a uniform interface. 00007 * 00008 * It is possible to do this because - we know how many training 00009 * instances there are apriori - and each time we split a node 00010 * we will know how many instances are going to the left/right 00011 * subtrees 00012 * 00013 * approx mem usage: 00014 * sparse: density * 5 bytes + map overhead 00015 * dense: num_instances bytes 00016 */ 00017 #ifndef _WEIGHTS_H_ 00018 #define _WEIGHTS_H_ 00019 00020 #include <vector> 00021 #include <iostream> 00022 #include "librf/types.h" 00023 00024 using namespace std; 00025 00026 namespace librf { 00027 00028 /* 00029 using ::__gnu_cxx::hash_map; 00030 class weight_interface { 00031 public: 00032 weight_interface(int max_size) { 00033 sum_ = 0; 00034 } 00035 unsigned int sum() { 00036 return sum_; 00037 } 00038 void add_instance(int instance, byte num =1) { 00039 sum_ += num; 00040 add_instance_impl(instance, num); 00041 } 00042 byte get_weight(int instance) const{ 00043 return get_weight_impl(instance); 00044 } 00045 private: 00046 unsigned int sum_; 00047 virtual void add_instance_impl(int instance, byte num) = 0; 00048 virtual byte get_weight_impl(int instance) const = 0; 00049 }; 00050 00051 00052 // need a smart iterator 00053 class sparse_weight : public weight_interface { 00054 public: 00055 sparse_weight(int size) : weight_interface(size) { 00056 } 00057 private: 00058 virtual void add_instance_impl(int i, byte num) { 00059 byte_map::iterator it = store_.find(i); 00060 if (it != store_.end()) { 00061 it->second += num; 00062 } else { 00063 store_[i] = num; 00064 } 00065 } 00066 virtual byte get_weight_impl(int i) const { 00067 byte_map::const_iterator it = store_.find(i); 00068 if (it != store_.end()) { 00069 return it->second; 00070 } 00071 return 0; 00072 } 00073 typedef google::dense_hash_map<int, byte> byte_map; 00074 byte_map store_; 00075 }; 00076 00077 class dense_weight : public weight_interface { 00078 public: 00079 dense_weight(int max_size) : weight_interface(max_size), 00080 store_(max_size,0) { 00081 } 00082 private: 00083 // virtual init_impl(int max_size) { 00084 // store_.resize(max_size, 0); 00085 // } 00086 virtual void add_instance_impl(int i, byte num) { 00087 // cout << int(this) <<" dense add " << i << "," << int(num) << endl; 00088 store_[i] += num; 00089 } 00090 virtual byte get_weight_impl(int i) const { 00091 return store_[i]; 00092 } 00093 vector<byte> store_; 00094 }; 00095 00096 /* 00097 class weight_list { 00098 public: 00099 weight_list(int n, int density) : num_instances_(n){ 00100 int sparse_cost = density * 5; 00101 if (sparse_cost < num_instances_) { 00102 // cout << "creating sparse" << endl; 00103 impl = new sparse_weight(num_instances_); 00104 } else { 00105 // cout << "creating dense " << density << "/" << num_instances_ << endl; 00106 impl = new dense_weight(num_instances_); 00107 } 00108 } 00109 byte operator[](int i) const{ 00110 return impl->get_weight(i); 00111 } 00112 void add(int i, byte num = 1) { 00113 impl->add_instance(i, num); 00114 } 00115 int size() const { 00116 return num_instances_; 00117 } 00118 int sum() const { 00119 return impl->sum(); 00120 } 00121 ~weight_list() { 00122 delete impl; 00123 } 00124 private: 00125 int num_instances_; 00126 weight_interface *impl; 00127 };*/ 00128 00129 class weight_list { 00130 public: 00131 weight_list(int n, int density) : num_instances_(n), sum_(0){ 00132 array_ = new byte[n]; 00133 for(int i =0; i < n; ++i) { 00134 array_[i] = 0; 00135 } 00136 } 00137 byte operator[](int i) const { 00138 return array_[i]; 00139 } 00140 void add(int i, byte num =1){ 00141 array_[i]+=num; 00142 sum_+=num; 00143 } 00144 int size() const { return num_instances_;} 00145 int sum() const { 00146 return sum_; 00147 } 00148 ~weight_list(){ 00149 delete[] array_; 00150 } 00151 private: 00152 int sum_; 00153 int num_instances_; 00154 byte* array_; 00155 }; 00156 00157 } // namespace 00158 #endif