00001
00008
#ifndef _INSTANCE_SET_H_
00009
#define _INSTANCE_SET_H_
00010
00011
#include <string>
00012
#include <vector>
00013
#include "librf/instance.h"
00014
#include "librf/discrete_dist.h"
00015
00016
using namespace std;
00017
00018
namespace librf {
00019
00020
class weight_list;
00025 class InstanceSet {
00026
public:
00028
InstanceSet();
00030
static InstanceSet*
create_subset(
const InstanceSet&,
const weight_list&);
00032
static InstanceSet*
load_csv_and_labels(
const string& data,
00033
const string& labels,
00034
bool header =
false,
00035
const string& delim =
",");
00037
static InstanceSet*
load_libsvm(
const string& data,
00038
int num_features);
00040
void save_var(
int var, vector<float> *target);
00042
void load_var(
int var,
const vector<float>&);
00044
void permute(
int var,
unsigned int * seed);
00046
void create_sorted_indices();
00048 const vector<int>&
get_sorted_indices(
int attribute)
const{
00049
return sorted_indices_[attribute];
00050 }
00052 int mode_label()
const {
00053
return distribution_.mode();
00054 }
00056 unsigned char label(
int i)
const{
00057
return labels_[i];
00058 }
00060 unsigned int size()
const {
00061
return labels_.size();
00062 }
00064 unsigned int num_attributes()
const {
00065
return attributes_.size();
00066 }
00068 float get_attribute(
int i,
int attr)
const {
00069
return attributes_[attr][i];
00070 }
00072
00073 string
get_varname(
int i)
const {
00074
return var_names_[i];
00075 }
00076
00077
00078
00079
private:
00081
InstanceSet(
const string& csv_data,
const string& labels,
00082
bool header=
false,
const string& delim=
",");
00084
InstanceSet(
const string& filename,
int num);
00086
InstanceSet(
const InstanceSet&,
const weight_list&);
00087
void load_labels(istream& in);
00088
void load_csv(istream& in,
bool header,
const string& delim);
00089
void load_svm(istream& in);
00090
void create_dummy_var_names(
int n);
00091
void sort_attribute(
const vector<float>&attribute, vector<int>*indices);
00092 DiscreteDist distribution_;
00093 vector<Instance> instances_;
00094
00095
00096 vector< vector<float> > attributes_;
00097
00098
00099 vector<unsigned char> labels_;
00100 vector<string> var_names_;
00101 vector< vector<int> > sorted_indices_;
00102 };
00103
00104 }
00105
#endif