public class SparseVectors
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
static class |
SparseVectors.OverlapRecord<T>
for a key, stores the two values from two sparse vectors.
|
Constructor and Description |
---|
SparseVectors() |
Modifier and Type | Method and Description |
---|---|
static <T> void |
addTo(java.util.Map<T,MutableDouble> addTo,
java.util.Map<T,MutableDouble> toAdd)
adds the second sparse vector to the first, modifying the first
|
static <S,T> void |
addTo2(java.util.Map<T,java.util.HashMap<S,MutableDouble>> addTo,
java.util.Map<T,java.util.HashMap<S,MutableDouble>> toAdd) |
static double |
binaryJaccard(java.util.Map<?,MutableDouble> c1,
java.util.Map<?,MutableDouble> c2) |
static <S,T,M extends java.util.Map<T,MutableDouble>> |
conditionPMI(java.util.Map<S,M> coOccurrence)
every entry in the co-occurrence matrix is replaced by the pointwise mutaul information of the
first key and the second key the frequency of the first key is given in firstFreq, the total is
in firstTotal so that p(first) is firstFreq.get(first)/firstTotal; similarly for second coTotal
is given in the second so that p(first & second) = coOccurrence.get(first).get(second)/coTotal
in this version, the coOccurrence matrix is assumed to be complete, so that the firstFreq,
secondFreq and all totals can be inferred from coOccurrence
|
static <S,T,M extends java.util.Map<T,MutableDouble>> |
conditionPMI(java.util.Map<S,M> coOccurrence,
java.util.Map<S,MutableDouble> firstFreq,
java.util.Map<T,MutableDouble> secondFreq,
double firstTotal,
double secondTotal,
double coTotal)
http://en.wikipedia.org/wiki/Pointwise_mutual_information every entry in the co-occurrence
matrix is replaced by the pointwise mutual information of the first key and the second key the
frequency of the first key is given in firstFreq, the total is in firstTotal so that p(first)
is firstFreq.get(first)/firstTotal; similarly for second coTotal is given in the second so that
p(first & second) = coOccurrence.get(first).get(second)/coTotal the pmi(x,y) is log(p(x&y) /
(p(x) * p(y)) but the matrix gives frequencies
|
static <T> java.util.HashMap<T,MutableDouble> |
copyValues(java.util.Map<T,MutableDouble> map) |
static <K> double |
cosineSimilarity(java.util.Map<K,MutableDouble> c1,
java.util.Map<K,MutableDouble> c2)
http://en.wikipedia.org/wiki/Cosine_similarity if it would be NaN, it is instead zero
|
static <T> void |
divideBy(java.util.Map<T,MutableDouble> m,
java.util.Map<T,MutableDouble> div) |
static <T> void |
divideByAdd1(java.util.Map<T,MutableDouble> m,
java.util.Map<T,MutableDouble> div)
Divides each entry in m by the corresponding entry in div with missing div = 0, adds 1 to each div
|
static <K> double |
dotProduct(java.util.Map<K,MutableDouble> x,
java.util.Map<K,MutableDouble> y)
"It is just the mathematical notion of dot product, sum of the products of each dimension
|
static <T> boolean |
equals(java.util.Map<T,MutableDouble> m1,
java.util.Map<T,MutableDouble> m2) |
static <S> double |
euclidean(java.util.Map<S,MutableDouble> c1,
java.util.Map<S,MutableDouble> c2) |
static <T> java.util.HashMap<T,MutableDouble> |
fromImmutable(java.util.Map<T,java.lang.Double> im) |
static java.util.HashMap<java.lang.String,MutableDouble> |
fromString(java.lang.String map) |
static java.util.HashMap<java.lang.String,MutableDouble> |
fromTSVString(java.lang.Iterable<java.lang.String> lines,
double min) |
static java.util.HashMap<java.lang.String,MutableDouble> |
fromTSVString(java.lang.String tsv,
double min) |
static <K1,K2> MutableDouble |
get(java.util.Map<K1,java.util.HashMap<K2,MutableDouble>> map,
K1 key1,
K2 key2,
double deflt) |
static <K> MutableDouble |
get(java.util.Map<K,MutableDouble> map,
K key,
double deflt) |
static <K> double |
getDefaultZero(java.util.Map<K,MutableDouble> m,
K key)
gets the value for the dimension key.
|
static int[] |
getHisto(java.util.Map<?,MutableDouble> m,
double[] thresholds)
thresholds is assumed sorted least to greatest the length of the returned value is one larger
than thresholds the returned value in position i is equal to the number of entries in the
sparse vector greater than thresholds[i-1] and less than thresholds[i] at the first position
the value is equal to the number of entries in the sparse vector less than any threshold at the
final position the value is equal to the number of entries greater than any threshold
|
static <T> java.util.Collection<T> |
getKeyDims(java.util.Map<T,MutableDouble> map,
int numDims)
Returns the keys associated with the numDims greatest values
|
static double |
getMean(java.util.Map<?,MutableDouble> m)
returns the average value
|
static <T> java.util.List<SparseVectors.OverlapRecord<T>> |
getOverlap(java.util.Map<T,MutableDouble> m1,
java.util.Map<T,MutableDouble> m2)
returns a sorted list with a OverlapRecord for every dimension in which both vectors have a
value greater than zero
|
static double |
getVariance(java.util.Map<?,MutableDouble> m,
double mean)
returns the variance of the values in m, assuming that their mean is 'mean'
|
static <S,T> boolean |
increase(java.util.Map<S,java.util.HashMap<T,MutableDouble>> doubleMap,
S key1,
T key2,
double value)
increase the entry corresponding to key1,key2 by value return true iff the entry was created
|
static <T> boolean |
increase(java.util.Map<T,MutableDouble> map,
T key,
double value)
increase the entry corresponding to key by value return true iff the entry was created
|
static <T> void |
linearScaleUnitVariance(java.util.Map<T,MutableDouble> m) |
static <T> Pair<T,MutableDouble> |
maxEntry(java.util.Map<T,MutableDouble> map) |
static <T> T |
maxKey(java.util.Map<T,MutableDouble> map)
returns the key for the maximum value, null if empty
|
static MutableDouble |
maxValue(java.util.Map<?,MutableDouble> map)
returns the maximum value, null if empty
|
static MutableDouble |
minValue(java.util.Map<?,MutableDouble> map) |
static double |
normalize(java.util.Map<?,MutableDouble> m)
scale the vector such that its twoNorm is now 1.0 return the twoNorm before it was scaled
|
static double |
normalizeOne(java.util.Map<?,MutableDouble> m)
scale the vector such that its oneNorm is now 1.0 return the oneNorm before it was scaled
|
static double |
oneNorm(java.util.Map<?,MutableDouble> x)
oneNorm = L1 norm = Manhattan norm http://en.wikipedia.org/wiki/Norm_(mathematics)
|
static <T> java.lang.Double |
rank(java.util.Map<T,MutableDouble> map,
T key)
Rank 1 means key is the maxKey, rank 2 means one entry is greater...
|
static void |
scale(java.util.Map<?,MutableDouble> m,
double scalingFactor)
multiply every entry by the scalingFactor
|
static <T> void |
set(java.util.Map<T,MutableDouble> toSet,
java.util.Map<T,MutableDouble> values) |
static <T> boolean |
set(java.util.Map<T,MutableDouble> map,
T key,
double setTo) |
static <T> boolean |
setMax(java.util.Map<T,MutableDouble> map,
T key,
double maybeMax) |
static <T> java.util.List<Pair<T,MutableDouble>> |
sorted(java.util.Map<T,MutableDouble> m)
least to greatest
|
static <T> java.util.List<Pair<T,MutableDouble>> |
sorted(java.util.Map<T,MutableDouble> m,
SecondPairComparator<T,MutableDouble> comp)
returns the sparse vector as a list of pairs, sorted by comp
|
static <T> java.util.List<Pair<T,MutableDouble>> |
sortedReverse(java.util.Map<T,MutableDouble> m)
greatest to least
|
static java.lang.String |
stringHisto(double[] thresholds,
int[] counts)
produces a nicely formated display of the thresholds and counts the number of lines is equal to
the number of counts which is equal to the number of thresholds plus one
|
static <T> void |
subtract(java.util.Map<T,MutableDouble> from,
java.util.Map<T,MutableDouble> x,
boolean newEntries)
the values in from are reduced by the corresponding amount in x if newEntries is true, a
missing (zero) value in 'from' for 'key' will result in a new entry that is negative x.get(key)
only the first sparse vector is modified
|
static double |
sum(java.util.Map<?,MutableDouble> c1)
return the sum of the entries in the vector
|
static double |
sum2(java.util.Map<?,? extends java.util.Map<?,MutableDouble>> c1)
return the sum of the entries in the matrix
|
static <S,T,M extends java.util.Map<T,MutableDouble>> |
thresholds(java.util.Map<S,M> doubleMap,
double minValue,
double maxValue) |
static <S,T,M extends java.util.Map<T,MutableDouble>> |
thresholdsAbs(java.util.Map<S,M> doubleMap,
double minValue,
double maxValue) |
static double[] |
toDense(java.util.Map<java.lang.Integer,MutableDouble> m) |
static <T> java.util.HashMap<T,java.lang.Double> |
toImmutable(java.util.Map<T,MutableDouble> m) |
static <T> java.lang.String |
toString(java.util.List<SparseVectors.OverlapRecord<T>> overlap,
int topN) |
static <K> java.lang.String |
toString(java.util.Map<K,MutableDouble> x)
for each entry, one per line, it returns the toString of the key, then a space, then the value
the entries are sorted by absolute value, greatest to least the toString of the key is padded
with spaces on the left so that the keys line up on the right
|
static <K> java.lang.String |
toString(java.util.Map<K,MutableDouble> x,
int topN)
for each entry, one per line, it returns the toString of the key, then a space, then the value
the topN entries are sorted by absolute value, greatest to least the toString of the key is
padded with spaces on the left so that the keys line up on the right
|
static <T> java.lang.String |
toTSVString(java.util.Map<T,MutableDouble> m) |
static <T> void |
toTSVString(java.util.Map<T,MutableDouble> m,
java.lang.String filename) |
static <K> void |
trimByThreshold(java.util.Map<K,MutableDouble> map,
double removeBelow)
emoves all entries less than 'removeBelow
|
static <K> void |
trimByThresholdAbs(java.util.Map<K,MutableDouble> hashMap,
double removeBelow) |
static <T> void |
trimDimensions(java.util.Map<T,MutableDouble> map,
double retainLength) |
static <S,T,M extends java.util.Map<T,MutableDouble>> |
trimDimensions2(java.util.Map<S,M> coOccurrence,
double retainLength) |
static <S,T,M extends java.util.Map<T,MutableDouble>> |
trimDouble(java.util.Map<S,M> coOccurrence,
double minFirst,
double minSecond) |
static <E> int |
trimDoubleByThreshold(java.util.Map<E,? extends java.util.Map<E,MutableDouble>> doubleMap,
double removeBelow)
removes all entries less than 'removeBelow', returns total number of entries in the sparse
matrix
|
static <T> void |
trimToTopN(java.util.Map<T,MutableDouble> map,
int maxSize) |
static double |
twoNorm(java.util.Map<?,MutableDouble> x)
twoNorm = L2 norm = Euclidean norm http://en.wikipedia.org/wiki/Norm_(mathematics)
|
public static <K> double dotProduct(java.util.Map<K,MutableDouble> x, java.util.Map<K,MutableDouble> y)
x
- y
- public static double normalize(java.util.Map<?,MutableDouble> m)
m
- public static double normalizeOne(java.util.Map<?,MutableDouble> m)
m
- public static void scale(java.util.Map<?,MutableDouble> m, double scalingFactor)
m
- scalingFactor
- public static double twoNorm(java.util.Map<?,MutableDouble> x)
x
- public static double oneNorm(java.util.Map<?,MutableDouble> x)
x
- public static double sum(java.util.Map<?,MutableDouble> c1)
c1
- public static double sum2(java.util.Map<?,? extends java.util.Map<?,MutableDouble>> c1)
c1
- public static <T> boolean increase(java.util.Map<T,MutableDouble> map, T key, double value)
map
- key
- value
- public static <S,T> boolean increase(java.util.Map<S,java.util.HashMap<T,MutableDouble>> doubleMap, S key1, T key2, double value)
doubleMap
- key1
- key2
- value
- public static double getMean(java.util.Map<?,MutableDouble> m)
m
- public static double getVariance(java.util.Map<?,MutableDouble> m, double mean)
m
- mean
- public static <T> T maxKey(java.util.Map<T,MutableDouble> map)
map
- public static MutableDouble maxValue(java.util.Map<?,MutableDouble> map)
map
- public static <K> void trimByThreshold(java.util.Map<K,MutableDouble> map, double removeBelow)
map
- removeBelow
- public static <E> int trimDoubleByThreshold(java.util.Map<E,? extends java.util.Map<E,MutableDouble>> doubleMap, double removeBelow)
omap
- removeBelow
- public static <K> double getDefaultZero(java.util.Map<K,MutableDouble> m, K key)
m
- key
- public static <T> void addTo(java.util.Map<T,MutableDouble> addTo, java.util.Map<T,MutableDouble> toAdd)
addTo
- toAdd
- public static <S,T> void addTo2(java.util.Map<T,java.util.HashMap<S,MutableDouble>> addTo, java.util.Map<T,java.util.HashMap<S,MutableDouble>> toAdd)
public static <T> void subtract(java.util.Map<T,MutableDouble> from, java.util.Map<T,MutableDouble> x, boolean newEntries)
from
- x
- newEntries
- public static <K> double cosineSimilarity(java.util.Map<K,MutableDouble> c1, java.util.Map<K,MutableDouble> c2)
c1
- c2
- public static int[] getHisto(java.util.Map<?,MutableDouble> m, double[] thresholds)
m
- thresholds
- public static java.lang.String stringHisto(double[] thresholds, int[] counts)
thresholds
- counts
- public static <K> java.lang.String toString(java.util.Map<K,MutableDouble> x)
x
- public static <K> java.lang.String toString(java.util.Map<K,MutableDouble> x, int topN)
x
- topN
- public static <T> java.util.Collection<T> getKeyDims(java.util.Map<T,MutableDouble> map, int numDims)
map
- numDims
- public static <T> java.util.List<Pair<T,MutableDouble>> sorted(java.util.Map<T,MutableDouble> m, SecondPairComparator<T,MutableDouble> comp)
m
- public static <T> java.util.List<Pair<T,MutableDouble>> sorted(java.util.Map<T,MutableDouble> m)
m
- public static <T> java.util.List<Pair<T,MutableDouble>> sortedReverse(java.util.Map<T,MutableDouble> m)
m
- public static <T> java.util.List<SparseVectors.OverlapRecord<T>> getOverlap(java.util.Map<T,MutableDouble> m1, java.util.Map<T,MutableDouble> m2)
m1
- m2
- public static <S,T,M extends java.util.Map<T,MutableDouble>> void conditionPMI(java.util.Map<S,M> coOccurrence, java.util.Map<S,MutableDouble> firstFreq, java.util.Map<T,MutableDouble> secondFreq, double firstTotal, double secondTotal, double coTotal)
coOccurrence
- firstFreq
- secondFreq
- firstTotal
- secondTotal
- coTotal
- public static <S,T,M extends java.util.Map<T,MutableDouble>> void conditionPMI(java.util.Map<S,M> coOccurrence)
coOccurrence
- public static <T> void toTSVString(java.util.Map<T,MutableDouble> m, java.lang.String filename)
public static <T> java.lang.String toTSVString(java.util.Map<T,MutableDouble> m)
public static java.util.HashMap<java.lang.String,MutableDouble> fromTSVString(java.lang.String tsv, double min)
public static java.util.HashMap<java.lang.String,MutableDouble> fromTSVString(java.lang.Iterable<java.lang.String> lines, double min)
public static <T> boolean equals(java.util.Map<T,MutableDouble> m1, java.util.Map<T,MutableDouble> m2)
public static <T> java.util.HashMap<T,MutableDouble> copyValues(java.util.Map<T,MutableDouble> map)
public static <S,T,M extends java.util.Map<T,MutableDouble>> void trimDimensions2(java.util.Map<S,M> coOccurrence, double retainLength)
public static <T> void trimDimensions(java.util.Map<T,MutableDouble> map, double retainLength)
public static <T> void trimToTopN(java.util.Map<T,MutableDouble> map, int maxSize)
public static <T> java.util.HashMap<T,MutableDouble> fromImmutable(java.util.Map<T,java.lang.Double> im)
public static <T> java.util.HashMap<T,java.lang.Double> toImmutable(java.util.Map<T,MutableDouble> m)
public static double[] toDense(java.util.Map<java.lang.Integer,MutableDouble> m)
public static <T> boolean setMax(java.util.Map<T,MutableDouble> map, T key, double maybeMax)
public static <K> MutableDouble get(java.util.Map<K,MutableDouble> map, K key, double deflt)
public static <K1,K2> MutableDouble get(java.util.Map<K1,java.util.HashMap<K2,MutableDouble>> map, K1 key1, K2 key2, double deflt)
public static <T> void set(java.util.Map<T,MutableDouble> toSet, java.util.Map<T,MutableDouble> values)
public static <T> boolean set(java.util.Map<T,MutableDouble> map, T key, double setTo)
public static MutableDouble minValue(java.util.Map<?,MutableDouble> map)
public static <K> void trimByThresholdAbs(java.util.Map<K,MutableDouble> hashMap, double removeBelow)
public static java.util.HashMap<java.lang.String,MutableDouble> fromString(java.lang.String map)
public static <T> Pair<T,MutableDouble> maxEntry(java.util.Map<T,MutableDouble> map)
public static <T> java.lang.Double rank(java.util.Map<T,MutableDouble> map, T key)
map
- key
- public static <T> void divideByAdd1(java.util.Map<T,MutableDouble> m, java.util.Map<T,MutableDouble> div)
m
- div
- public static <T> void divideBy(java.util.Map<T,MutableDouble> m, java.util.Map<T,MutableDouble> div)
public static <T> void linearScaleUnitVariance(java.util.Map<T,MutableDouble> m)
public static <S> double euclidean(java.util.Map<S,MutableDouble> c1, java.util.Map<S,MutableDouble> c2)
public static <S,T,M extends java.util.Map<T,MutableDouble>> void trimDouble(java.util.Map<S,M> coOccurrence, double minFirst, double minSecond)
public static <S,T,M extends java.util.Map<T,MutableDouble>> int thresholds(java.util.Map<S,M> doubleMap, double minValue, double maxValue)
public static <S,T,M extends java.util.Map<T,MutableDouble>> int thresholdsAbs(java.util.Map<S,M> doubleMap, double minValue, double maxValue)
public static <T> java.lang.String toString(java.util.List<SparseVectors.OverlapRecord<T>> overlap, int topN)
public static double binaryJaccard(java.util.Map<?,MutableDouble> c1, java.util.Map<?,MutableDouble> c2)