com.sjm.machlearn.dataset
Class DataSet
java.lang.Object
|
+--com.sjm.machlearn.util.MainClass
|
+--com.sjm.machlearn.dataset.DataSet
- public class DataSet
- extends MainClass
DataSet.java : class that holds a whole
data set of examples together.
Methods inherited from class com.sjm.machlearn.util.MainClass |
_internalError,
_internalError,
addDebugListener,
debugMesg,
debugMesg,
debugMesg,
debugMesg,
debugMesg,
debugMesg,
debugOff,
debugOn,
internalError,
internalError,
setDebug,
setDebugLevel |
Methods inherited from class java.lang.Object |
clone,
equals,
finalize,
getClass,
hashCode,
notify,
notifyAll,
toString,
wait,
wait,
wait |
idlist
protected FeatureIdList idlist
data
protected Example[] data
weights
protected double[] weights
DataSet
public DataSet(DataSet ds)
DataSet
public DataSet(Example[] ex)
DataSet
public DataSet(FeatureIdList fid)
DataSet
public DataSet(Feature[][] featurematrix)
throws InvalidFeature,
NoOutputException,
MultipleOutputException
DataSet
public DataSet(FeatureIdList idl,
Example[] dt)
DataSet
public DataSet(java.lang.String namesfile,
java.lang.String datafile)
throws java.lang.Exception
copyWeights
public void copyWeights(double[] wgts)
copyWeights
public void copyWeights(DataSet ds)
initializeWeights
public void initializeWeights()
getIdList
public FeatureIdList getIdList()
getFeatureId
public FeatureId getFeatureId(int index)
getOutputValueIds
public int[] getOutputValueIds()
hasFeature
public boolean hasFeature(FeatureId featureid)
getFeatureIndex
public int getFeatureIndex(FeatureId featureid)
throws MissingFeatureException
get
public Example get(int index)
getOutputIndex
public int getOutputIndex()
getWeight
public double getWeight(int index)
setWeight
public void setWeight(int index,
double val)
getTotalWeights
public double getTotalWeights()
normalizeWeights
public void normalizeWeights()
sameOutputValue
public boolean sameOutputValue()
getMajorityOutputIndex
public int getMajorityOutputIndex()
getMajorityOutputIndexWgt
public int getMajorityOutputIndexWgt()
getOutputFeature
public Feature getOutputFeature()
getOutputFeatureId
public FeatureId getOutputFeatureId()
getOutputFeature
public Feature getOutputFeature(int example_index)
getOutputCounts
public int[] getOutputCounts()
getOutputCountsWgt
public double[] getOutputCountsWgt()
getEntropyWgt
public double getEntropyWgt()
getEntropy
public double getEntropy()
size
public int size()
numFeatures
public int numFeatures()
countOutput
public int countOutput(java.lang.String value)
throws InvalidFeature
writeNames
public void writeNames(java.lang.String namesfile)
throws java.lang.Exception
writeData
public void writeData(java.lang.String datafile)
throws java.lang.Exception
write
public void write(java.lang.String namesfile,
java.lang.String datafile)
throws java.lang.Exception
readData
protected void readData(java.lang.String datafile)
throws java.lang.Exception
merge
public void merge(DataSet newSet)
splitRandom
public DataSet[] splitRandom(double testpercent)
- splitRandom() : splits the
data into two subsets, based
upon the testpercent.
examples are chosen randomly
from this dataset.
DataSet[0] = smaller part of the dataset.
DataSet[1] = rest of the dataset.
splitJackKnife
public DataSet[] splitJackKnife(int index)
splitDataSetFolds
public DataSet[][] splitDataSetFolds(int numfolds,
boolean randomize)
- splitDataSetFolds() : splits the dataset
into an array of n-folds.
1st index is the fold number.
2nd index is
0=trainset
1=testset
splitDataSet
public DataSet[] splitDataSet(int numSets,
boolean randomize)
getBootStrapReplicate
public DataSet[] getBootStrapReplicate()
createMapping
public int[] createMapping(boolean randomize)
printFeatureIdList
public java.lang.String printFeatureIdList()