public abstract class AbstractClassifier
extends java.lang.Object
implements java.io.Serializable
Modifier and Type | Field and Description |
---|---|
static com.ibm.bluej.util.common.MutableDouble |
PRESENT |
Constructor and Description |
---|
AbstractClassifier() |
Modifier and Type | Method and Description |
---|---|
static <T> java.util.Map<T,com.ibm.bluej.util.common.MutableDouble> |
convertBinaryFeatures(java.lang.Iterable<T> features)
Converts a set of binary features into the feature/value map format expected by train
|
<T> com.ibm.bluej.util.common.PrecisionRecallThreshold |
crossValidate(java.util.List<java.util.Map<T,com.ibm.bluej.util.common.MutableDouble>>[] allVectors)
Ten fold cross validation
|
<T> com.ibm.bluej.util.common.PrecisionRecallThreshold |
crossValidate(java.util.List<java.util.Map<T,com.ibm.bluej.util.common.MutableDouble>>[] allVectors,
int folds,
long seed)
If binary classifier it assumes the "relevant" class for P/R is index 0
|
static <S,M extends java.util.Map<S,com.ibm.bluej.util.common.MutableDouble>> |
holdOut(java.util.Collection<M>[] allVectors,
com.ibm.bluej.util.common.IdentitySet<M> heldOut) |
abstract int |
predict(java.util.Map<?,com.ibm.bluej.util.common.MutableDouble> features)
Predict the most likely class label
|
abstract double[] |
predictProb(java.util.Map<?,com.ibm.bluej.util.common.MutableDouble> features)
Predict a probability for each class
|
static double |
probToWeight(double prob) |
static java.util.Collection<? extends java.util.Map<?,com.ibm.bluej.util.common.MutableDouble>>[] |
pruneFeatures(java.util.Collection<? extends java.util.Map<?,com.ibm.bluej.util.common.MutableDouble>>[] trainingData,
int minOccurrences) |
static java.util.ArrayList<java.util.Map<java.lang.Integer,com.ibm.bluej.util.common.MutableDouble>>[] |
testingData(int numClasses,
int numInstances,
int numFeatures,
int numNoise)
Generate some data for testing, if numFeatures and numInstances are pretty high it should be possible to do well on this data
|
abstract void |
train(java.util.Collection<? extends java.util.Map<?,com.ibm.bluej.util.common.MutableDouble>>[] allVectors)
Trains the model using the allVectors as training data
|
public static <T> java.util.Map<T,com.ibm.bluej.util.common.MutableDouble> convertBinaryFeatures(java.lang.Iterable<T> features)
features
- binary featurespublic static java.util.Collection<? extends java.util.Map<?,com.ibm.bluej.util.common.MutableDouble>>[] pruneFeatures(java.util.Collection<? extends java.util.Map<?,com.ibm.bluej.util.common.MutableDouble>>[] trainingData, int minOccurrences)
public static java.util.ArrayList<java.util.Map<java.lang.Integer,com.ibm.bluej.util.common.MutableDouble>>[] testingData(int numClasses, int numInstances, int numFeatures, int numNoise)
numClasses
- numInstances
- numFeatures
- numNoise
- public static double probToWeight(double prob)
public static <S,M extends java.util.Map<S,com.ibm.bluej.util.common.MutableDouble>> java.util.Collection<M>[] holdOut(java.util.Collection<M>[] allVectors, com.ibm.bluej.util.common.IdentitySet<M> heldOut)
public <T> com.ibm.bluej.util.common.PrecisionRecallThreshold crossValidate(java.util.List<java.util.Map<T,com.ibm.bluej.util.common.MutableDouble>>[] allVectors, int folds, long seed)
allVectors
- folds
- public <T> com.ibm.bluej.util.common.PrecisionRecallThreshold crossValidate(java.util.List<java.util.Map<T,com.ibm.bluej.util.common.MutableDouble>>[] allVectors)
allVectors
- public abstract void train(java.util.Collection<? extends java.util.Map<?,com.ibm.bluej.util.common.MutableDouble>>[] allVectors)
allVectors
- The indices of the array correspond to the classes to be distinguished. Each class has an Iterable of instances - feature/value maps.public abstract double[] predictProb(java.util.Map<?,com.ibm.bluej.util.common.MutableDouble> features)
features
- The features for the instance to be classifiedpublic abstract int predict(java.util.Map<?,com.ibm.bluej.util.common.MutableDouble> features)
features
- The features for the instance to be classified