|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectedu.stanford.nlp.classify.GeneralDataset
edu.stanford.nlp.classify.Dataset
public class Dataset
An interfacing class for ClassifierFactory
that incrementally
builds a more memory-efficent representation of a List
of
Datum
objects for the purposes of training a Classifier
with a ClassifierFactory
.
Field Summary |
---|
Fields inherited from class edu.stanford.nlp.classify.GeneralDataset |
---|
data, featureIndex, labelIndex, labels, size |
Constructor Summary | |
---|---|
Dataset()
|
|
Dataset(Index labelIndex,
int[] labels,
Index featureIndex,
int[][] data)
Constructor that fully specifies a Dataset. |
|
Dataset(Index labelIndex,
int[] labels,
Index featureIndex,
int[][] data,
int size)
Constructor that fully specifies a Dataset. |
|
Dataset(int numDatums)
|
|
Dataset(int numDatums,
Index featureIndex,
Index labelIndex)
|
Method Summary | |
---|---|
void |
add(Collection features,
Object label)
|
void |
add(Datum d)
|
protected void |
addFeatures(Collection features)
|
protected void |
addLabel(Object label)
|
void |
applyFeatureCountThreshold(List<Pair<Pattern,Integer>> thresholds)
Applies feature count thresholds to the Dataset. |
void |
changeFeatureIndex(Index newFeatureIndex)
|
void |
changeLabelIndex(Index newLabelIndex)
|
protected void |
ensureSize()
|
Datum |
getDatum(int index)
|
Counter |
getFeatureCounter()
Get Number of datums a given feature appears in. |
double[] |
getInformationGains()
|
Dataset |
getRandomSubDataset(double p,
int seed)
|
RVFDatum |
getRVFDatum(int index)
|
double[][] |
getValuesArray()
|
protected void |
initialize(int numDatums)
This method takes care of resetting values of the dataset such that it is empty with an initial capacity of numDatums Should be accessed only by appropriate methods within the class, such as clear(), which take care of other parts of the emptying of data |
static void |
main(String[] args)
|
void |
printFullFeatureMatrix(PrintWriter pw)
prints the full feature matrix in tab-delimited form. |
void |
printSparseFeatureMatrix()
prints the sparse feature matrix using printSparseFeatureMatrix()
to System.out . |
void |
printSparseFeatureMatrix(PrintWriter pw)
prints a sparse feature matrix representation of the Dataset. |
static void |
printSVMLightFormat(PrintWriter pw,
Counter<Integer> c,
int classNo)
Need to sort the counter by feature keys and dump it |
static Dataset |
readSVMLightFormat(String filename)
Constructs a Dataset by reading in a file in SVM light format. |
static Dataset |
readSVMLightFormat(String filename,
Index featureIndex,
Index labelIndex)
Constructs a Dataset by reading in a file in SVM light format. |
static Dataset |
readSVMLightFormat(String filename,
Index featureIndex,
Index labelIndex,
List<String> lines)
Constructs a Dataset by reading in a file in SVM light format. |
static Dataset |
readSVMLightFormat(String filename,
List<String> lines)
Constructs a Dataset by reading in a file in SVM light format. |
void |
selectFeaturesBinaryInformationGain(int numFeatures)
|
Pair<GeneralDataset,GeneralDataset> |
split(double percentDev)
|
Pair<GeneralDataset,GeneralDataset> |
split(int start,
int end)
|
void |
summaryStatistics()
Prints some summary statistics to stderr for the Dataset. |
static Datum |
svmLightLineToDatum(String l)
|
String |
toString()
|
String |
toSummaryStatistics()
|
String |
toSummaryString()
|
Methods inherited from class edu.stanford.nlp.classify.GeneralDataset |
---|
addAll, applyFeatureCountThreshold, clear, clear, featureIndex, getDataArray, getFeatureCounts, getLabelsArray, labelIndex, labelIterator, numClasses, numFeatures, numFeatureTokens, numFeatureTypes, printSVMLightFormat, printSVMLightFormat, size, trimData, trimLabels, trimToSize, trimToSize, trimToSize |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Constructor Detail |
---|
public Dataset()
public Dataset(int numDatums, Index featureIndex, Index labelIndex)
public Dataset(int numDatums)
public Dataset(Index labelIndex, int[] labels, Index featureIndex, int[][] data)
public Dataset(Index labelIndex, int[] labels, Index featureIndex, int[][] data, int size)
Method Detail |
---|
public Pair<GeneralDataset,GeneralDataset> split(double percentDev)
split
in class GeneralDataset
public Pair<GeneralDataset,GeneralDataset> split(int start, int end)
split
in class GeneralDataset
public Dataset getRandomSubDataset(double p, int seed)
public double[][] getValuesArray()
getValuesArray
in class GeneralDataset
public static Dataset readSVMLightFormat(String filename)
public static Dataset readSVMLightFormat(String filename, List<String> lines)
public static Dataset readSVMLightFormat(String filename, Index featureIndex, Index labelIndex)
public static Dataset readSVMLightFormat(String filename, Index featureIndex, Index labelIndex, List<String> lines)
public static Datum svmLightLineToDatum(String l)
public Counter getFeatureCounter()
public void add(Datum d)
add
in class GeneralDataset
public void add(Collection features, Object label)
protected void ensureSize()
protected void addLabel(Object label)
protected void addFeatures(Collection features)
protected void initialize(int numDatums)
GeneralDataset
initialize
in class GeneralDataset
numDatums
- initial capacity of datasetpublic Datum getDatum(int index)
index
-
public RVFDatum getRVFDatum(int index)
getRVFDatum
in class GeneralDataset
index
-
public void summaryStatistics()
summaryStatistics
in class GeneralDataset
public String toSummaryStatistics()
public void applyFeatureCountThreshold(List<Pair<Pattern,Integer>> thresholds)
thresholds
- a list of pattern, threshold pairspublic void printFullFeatureMatrix(PrintWriter pw)
public void printSparseFeatureMatrix()
printSparseFeatureMatrix()
to System.out
.
public void printSparseFeatureMatrix(PrintWriter pw)
Object.toString()
representations of features.
public static void main(String[] args)
public void changeLabelIndex(Index newLabelIndex)
public void changeFeatureIndex(Index newFeatureIndex)
public void selectFeaturesBinaryInformationGain(int numFeatures)
public double[] getInformationGains()
public String toString()
toString
in class Object
public String toSummaryString()
public static void printSVMLightFormat(PrintWriter pw, Counter<Integer> c, int classNo)
pw
- c
- classNo
-
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |