Information describing a dataset and its features. More...
Public Member Functions | |
const DatasetFeature & | feature (index_t attrib_num) const |
Gets information about a particular feature. | |
const DatasetFeature & | feature (index_t attrib_num) const |
Gets information about a particular feature. | |
ArrayList< DatasetFeature > & | features () |
Gets a mutable list of all features. | |
ArrayList< DatasetFeature > & | features () |
Gets a mutable list of all features. | |
void | Init (const char *name_in="dataset") |
Initialize a custom dataset. | |
void | Init (const char *name_in="dataset") |
Initialize a custom dataset. | |
void | InitContinuous (index_t n_features, const char *name_in="dataset") |
Initialize an all-continuous dataset;. | |
void | InitContinuous (index_t n_features, const char *name_in="dataset") |
Initialize an all-continuous dataset;. | |
success_t | InitFromArff (TextLineReader *reader, const char *filename="dataset") |
Initialize explicitly from an ARFF file. | |
success_t | InitFromArff (TextLineReader *reader, const char *filename="dataset") |
Initialize explicitly from an ARFF file. | |
success_t | InitFromCsv (TextLineReader *reader, const char *filename="dataset") |
Initialize from a CSV-like file with numbers only, inferring automatically that if the first row has non-numeric characters, it is a header. | |
success_t | InitFromCsv (TextLineReader *reader, const char *filename="dataset") |
Initialize from a CSV-like file with numbers only, inferring automatically that if the first row has non-numeric characters, it is a header. | |
success_t | InitFromFile (TextLineReader *reader, const char *filename="dataset") |
Initializes the header from a file, either CSV or ARFF. | |
success_t | InitFromFile (TextLineReader *reader, const char *filename="dataset") |
Initializes the header from a file, either CSV or ARFF. | |
bool | is_all_continuous () const |
Checks if all parameters are continuous. | |
bool | is_all_continuous () const |
Checks if all parameters are continuous. | |
index_t | n_features () const |
Gets the number of features. | |
index_t | n_features () const |
Gets the number of features. | |
const char * | name () const |
Gets the title of the data set. | |
const char * | name () const |
Gets the title of the data set. | |
success_t | ReadMatrix (TextLineReader *reader, Matrix *matrix) const |
Populates a matrix from a file, given the internal data model. | |
success_t | ReadMatrix (TextLineReader *reader, Matrix *matrix) const |
Populates a matrix from a file, given the internal data model. | |
success_t | ReadPoint (TextLineReader *reader, double *point, bool *is_done) const |
Reads a single vector. | |
success_t | ReadPoint (TextLineReader *reader, double *point, bool *is_done) const |
Reads a single vector. | |
void | set_name (const char *name_in) |
Sets the title of the data set. | |
void | set_name (const char *name_in) |
Sets the title of the data set. | |
void | WriteArffHeader (TextWriter *writer) const |
Writes the header for an ARFF file. | |
void | WriteArffHeader (TextWriter *writer) const |
Writes the header for an ARFF file. | |
void | WriteCsvHeader (const char *sep, TextWriter *writer) const |
Writes header for CSV file. | |
void | WriteCsvHeader (const char *sep, TextWriter *writer) const |
Writes header for CSV file. | |
void | WriteMatrix (const Matrix &matrix, const char *sep, TextWriter *writer) const |
Writes the contents of a matrix to a file. | |
void | WriteMatrix (const Matrix &matrix, const char *sep, TextWriter *writer) const |
Writes the contents of a matrix to a file. |
Information describing a dataset and its features.
Definition at line 219 of file dataset.h.
const DatasetFeature& DatasetInfo::feature | ( | index_t | attrib_num | ) | const [inline] |
const DatasetFeature& DatasetInfo::feature | ( | index_t | attrib_num | ) | const [inline] |
Gets information about a particular feature.
Definition at line 236 of file dataset.h.
Referenced by Dataset::Format(), SimpleCrossValidator< TClassifier >::Init(), InitFromArff(), and WriteArffHeader().
ArrayList<DatasetFeature>& DatasetInfo::features | ( | ) | [inline] |
ArrayList<DatasetFeature>& DatasetInfo::features | ( | ) | [inline] |
void DatasetInfo::Init | ( | const char * | name_in = "dataset" |
) |
Initialize a custom dataset.
This assumes you will eventually use the features() to add features.
name_in | the dataset title |
void DatasetInfo::Init | ( | const char * | name_in = "dataset" |
) |
Initialize a custom dataset.
This assumes you will eventually use the features() to add features.
name_in | the dataset title |
Definition at line 127 of file dataset.cc.
References String::Copy(), and ArrayList< TElem >::Init().
Referenced by InitFromArff(), InitFromCsv(), Dataset::InitFromFile(), and InitFromFile().
void DatasetInfo::InitContinuous | ( | index_t | n_features, | |
const char * | name_in = "dataset" | |||
) |
Initialize an all-continuous dataset;.
n_features | the number of continuous features | |
name_in | the dataset title |
void DatasetInfo::InitContinuous | ( | index_t | n_features, | |
const char * | name_in = "dataset" | |||
) |
Initialize an all-continuous dataset;.
n_features | the number of continuous features | |
name_in | the dataset title |
Definition at line 114 of file dataset.cc.
References String::Copy(), ArrayList< TElem >::Init(), and String::InitSprintf().
Referenced by Dataset::AliasMatrix(), Dataset::CopyMatrix(), and Dataset::OwnMatrix().
success_t DatasetInfo::InitFromArff | ( | TextLineReader * | reader, | |
const char * | filename = "dataset" | |||
) |
Initialize explicitly from an ARFF file.
ARFF LIMITATIONS: Values cannot have spaces or commas, even with quotes; 'string' data type not supported (nominal is supported).
You might just use InitFromFile, which will guess the type for you.
This will read only the header information and leave the reader at the first line of data.
success_t DatasetInfo::InitFromArff | ( | TextLineReader * | reader, | |
const char * | filename = "dataset" | |||
) |
Initialize explicitly from an ARFF file.
ARFF LIMITATIONS: Values cannot have spaces or commas, even with quotes; 'string' data type not supported (nominal is supported).
You might just use InitFromFile, which will guess the type for you.
This will read only the header information and leave the reader at the first line of data.
Definition at line 161 of file dataset.cc.
References String::EqualsNoCase(), TextLineReader::Error(), feature(), TextLineReader::Gobble(), ArrayList< TElem >::Init(), Init(), DatasetFeature::InitNominal(), TextLineReader::Peek(), ArrayList< TElem >::PushBack(), set_name(), ArrayList< TElem >::size(), String::Split(), and DatasetFeature::value_names().
Referenced by InitFromFile().
success_t DatasetInfo::InitFromCsv | ( | TextLineReader * | reader, | |
const char * | filename = "dataset" | |||
) |
Initialize from a CSV-like file with numbers only, inferring automatically that if the first row has non-numeric characters, it is a header.
InitFromFile will automatically detect this.
success_t DatasetInfo::InitFromCsv | ( | TextLineReader * | reader, | |
const char * | filename = "dataset" | |||
) |
Initialize from a CSV-like file with numbers only, inferring automatically that if the first row has non-numeric characters, it is a header.
InitFromFile will automatically detect this.
Definition at line 233 of file dataset.cc.
References TextLineReader::Error(), TextLineReader::Gobble(), ArrayList< TElem >::Init(), Init(), String::InitSprintf(), name(), TextLineReader::Peek(), ArrayList< TElem >::PushBack(), ArrayList< TElem >::size(), and String::Split().
Referenced by InitFromFile().
success_t DatasetInfo::InitFromFile | ( | TextLineReader * | reader, | |
const char * | filename = "dataset" | |||
) |
Initializes the header from a file, either CSV or ARFF.
All header lines will be gobbled, so the reader's position will be left at the first line of actual data. You can then read the data with matrix.
success_t DatasetInfo::InitFromFile | ( | TextLineReader * | reader, | |
const char * | filename = "dataset" | |||
) |
Initializes the header from a file, either CSV or ARFF.
All header lines will be gobbled, so the reader's position will be left at the first line of actual data. You can then read the data with matrix.
Definition at line 279 of file dataset.cc.
References String::begin(), TextLineReader::Error(), Init(), InitFromArff(), InitFromCsv(), and TextLineReader::Peek().
Referenced by Dataset::InitFromFile().
bool DatasetInfo::is_all_continuous | ( | ) | const |
Checks if all parameters are continuous.
bool DatasetInfo::is_all_continuous | ( | ) | const |
Checks if all parameters are continuous.
Definition at line 391 of file dataset.cc.
References DatasetFeature::CONTINUOUS, and ArrayList< TElem >::size().
index_t DatasetInfo::n_features | ( | ) | const [inline] |
Gets the number of features.
Definition at line 241 of file dataset.h.
References ArrayList< TElem >::size().
index_t DatasetInfo::n_features | ( | ) | const [inline] |
Gets the number of features.
Definition at line 241 of file dataset.h.
References ArrayList< TElem >::size().
Referenced by ReadMatrix(), and ReadPoint().
const char* DatasetInfo::name | ( | ) | const [inline] |
const char* DatasetInfo::name | ( | ) | const [inline] |
Gets the title of the data set.
Definition at line 246 of file dataset.h.
Referenced by InitFromCsv(), and WriteCsvHeader().
success_t DatasetInfo::ReadMatrix | ( | TextLineReader * | reader, | |
Matrix * | matrix | |||
) | const |
Populates a matrix from a file, given the internal data model.
ARFF LIMITATIONS: Values cannot have spaces or commas, even with quotes; 'string' data type not supported (nominal is supported).
reader | the reader to get lines from | |
matrix | the matrix to store text into |
success_t DatasetInfo::ReadMatrix | ( | TextLineReader * | reader, | |
Matrix * | matrix | |||
) | const |
Populates a matrix from a file, given the internal data model.
ARFF LIMITATIONS: Values cannot have spaces or commas, even with quotes; 'string' data type not supported (nominal is supported).
reader | the reader to get lines from | |
matrix | the matrix to store text into |
Definition at line 401 of file dataset.cc.
References ArrayList< TElem >::Init(), n_features(), ArrayList< TElem >::PushBackRaw(), ReadPoint(), ArrayList< TElem >::ReleasePtr(), ArrayList< TElem >::Resize(), ArrayList< TElem >::size(), and ArrayList< TElem >::Trim().
Referenced by Dataset::InitFromFile().
success_t DatasetInfo::ReadPoint | ( | TextLineReader * | reader, | |
double * | point, | |||
bool * | is_done | |||
) | const |
Reads a single vector.
reader | the line reader being used | |
point | an array of length n_features() | |
is_done | set to true if we have finished reading the file successfully -- the value of is_done is undefined if the function returns failure! |
success_t DatasetInfo::ReadPoint | ( | TextLineReader * | reader, | |
double * | point, | |||
bool * | is_done | |||
) | const |
Reads a single vector.
reader | the line reader being used | |
point | an array of length n_features() | |
is_done | set to true if we have finished reading the file successfully -- the value of is_done is undefined if the function returns failure! |
Definition at line 431 of file dataset.cc.
References String::begin(), String::c_str(), String::Copy(), String::end(), TextLineReader::Error(), TextLineReader::Gobble(), TextLineReader::MoreLines(), n_features(), and TextLineReader::Peek().
Referenced by ReadMatrix().
void DatasetInfo::set_name | ( | const char * | name_in | ) | [inline] |
Sets the title of the data set.
Definition at line 251 of file dataset.h.
References String::Copy(), and String::Destruct().
void DatasetInfo::set_name | ( | const char * | name_in | ) | [inline] |
Sets the title of the data set.
Definition at line 251 of file dataset.h.
References String::Copy(), and String::Destruct().
Referenced by InitFromArff().
void DatasetInfo::WriteArffHeader | ( | TextWriter * | writer | ) | const |
Writes the header for an ARFF file.
void DatasetInfo::WriteArffHeader | ( | TextWriter * | writer | ) | const |
Writes the header for an ARFF file.
Definition at line 526 of file dataset.cc.
References String::c_str(), feature(), DatasetFeature::n_values(), DatasetFeature::name(), DatasetFeature::NOMINAL, ArrayList< TElem >::size(), DatasetFeature::type(), and DatasetFeature::value_name().
Referenced by Dataset::WriteArff().
void DatasetInfo::WriteCsvHeader | ( | const char * | sep, | |
TextWriter * | writer | |||
) | const |
Writes header for CSV file.
sep | the value separator (use ",\t" for CSV) | |
writer | the text writer to write the header line to |
void DatasetInfo::WriteCsvHeader | ( | const char * | sep, | |
TextWriter * | writer | |||
) | const |
Writes header for CSV file.
sep | the value separator (use ",\t" for CSV) | |
writer | the text writer to write the header line to |
Definition at line 549 of file dataset.cc.
References name(), and ArrayList< TElem >::size().
Referenced by Dataset::WriteCsv().
void DatasetInfo::WriteMatrix | ( | const Matrix & | matrix, | |
const char * | sep, | |||
TextWriter * | writer | |||
) | const |
Writes the contents of a matrix to a file.
matrix | the matrix | |
sep | the separator (use ",\t" for CSV) | |
writer | the writer to write to |
void DatasetInfo::WriteMatrix | ( | const Matrix & | matrix, | |
const char * | sep, | |||
TextWriter * | writer | |||
) | const |
Writes the contents of a matrix to a file.
matrix | the matrix | |
sep | the separator (use ",\t" for CSV) | |
writer | the writer to write to |
Definition at line 559 of file dataset.cc.
References ArrayList< TElem >::size().
Referenced by Dataset::WriteArff(), and Dataset::WriteCsv().