Main Page | Class List | File List | Class Members

instance_set.h

Go to the documentation of this file.
00001 00008 #ifndef _INSTANCE_SET_H_ 00009 #define _INSTANCE_SET_H_ 00010 00011 #include <string> 00012 #include <vector> 00013 #include "librf/instance.h" 00014 #include "librf/discrete_dist.h" 00015 00016 using namespace std; 00017 00018 namespace librf { 00019 00020 class weight_list; 00025 class InstanceSet { 00026 public: 00028 InstanceSet(); 00030 static InstanceSet* create_subset(const InstanceSet&, const weight_list&); 00032 static InstanceSet* load_csv_and_labels(const string& data, 00033 const string& labels, 00034 bool header = false, 00035 const string& delim =","); 00037 static InstanceSet* load_libsvm(const string& data, 00038 int num_features); 00040 void save_var(int var, vector<float> *target); 00042 void load_var(int var, const vector<float>&); 00044 void permute(int var, unsigned int * seed); 00046 void create_sorted_indices(); 00048 const vector<int>& get_sorted_indices(int attribute) const{ 00049 return sorted_indices_[attribute]; 00050 } 00052 int mode_label() const { 00053 return distribution_.mode(); 00054 } 00056 unsigned char label(int i) const{ 00057 return labels_[i]; 00058 } 00060 unsigned int size() const { 00061 return labels_.size(); 00062 } 00064 unsigned int num_attributes() const { 00065 return attributes_.size(); 00066 } 00068 float get_attribute(int i, int attr) const { 00069 return attributes_[attr][i]; 00070 } 00072 //names) 00073 string get_varname(int i) const { 00074 return var_names_[i]; 00075 } 00076 //float class_entropy() const{ 00077 // return distribution_.entropy_over_classes(); 00078 //} 00079 private: 00081 InstanceSet(const string& csv_data, const string& labels, 00082 bool header=false, const string& delim=","); 00084 InstanceSet(const string& filename, int num); 00086 InstanceSet(const InstanceSet&, const weight_list&); 00087 void load_labels(istream& in); 00088 void load_csv(istream& in, bool header, const string& delim); 00089 void load_svm(istream& in); 00090 void create_dummy_var_names(int n); 00091 void sort_attribute(const vector<float>&attribute, vector<int>*indices); 00092 DiscreteDist distribution_; 00093 vector<Instance> instances_; 00094 // List of Attribute Lists 00095 // Thus access is attributes_ [attribute] [ instance] 00096 vector< vector<float> > attributes_; 00097 // List of true labels 00098 // access is labels_ [instance] 00099 vector<unsigned char> labels_; 00100 vector<string> var_names_; 00101 vector< vector<int> > sorted_indices_; 00102 }; 00103 00104 } // namespace 00105 #endif

Generated on Mon Jan 8 23:19:06 2007 for librf by doxygen 1.3.7