Package cc.mallet.topics
Class WeightedTopicModel
- java.lang.Object
-
- cc.mallet.topics.WeightedTopicModel
-
- All Implemented Interfaces:
java.io.Serializable
public class WeightedTopicModel extends java.lang.Object implements java.io.Serializable
- See Also:
- Serialized Form
-
-
Field Summary
Fields Modifier and Type Field Description protected double
alpha
protected Alphabet
alphabet
protected double
alphaSum
protected double
beta
protected double
betaSum
protected java.util.ArrayList<TopicAssignment>
data
protected java.text.NumberFormat
formatter
protected double[]
logCountRatioCache
protected double[][]
logTypeTopicWeights
protected int
numTopics
protected int
numTypes
protected int[]
oneDocTopicCounts
protected boolean
printLogLikelihood
protected Randoms
random
int
showTopicsInterval
static java.util.regex.Pattern
sourceWordPattern
static java.util.regex.Pattern
targetWordPattern
protected int[]
tokensPerTopic
protected LabelAlphabet
topicAlphabet
protected double[]
totalTopicWeights
protected int[][]
typeTopicCounts
protected double[][]
typeTopicWeights
protected com.carrotsearch.hppc.IntDoubleHashMap[]
typeTypeWeights
int
wordsPerTopic
-
Constructor Summary
Constructors Constructor Description WeightedTopicModel(int numberOfTopics, double alphaSum, double beta, Randoms random)
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description void
addInstances(InstanceList training)
Alphabet
getAlphabet()
java.util.ArrayList<TopicAssignment>
getData()
MarginalProbEstimator
getEstimator()
int
getNumTopics()
LabelAlphabet
getTopicAlphabet()
int[]
getTopicTotals()
int[][]
getTypeTopicCounts()
static void
main(java.lang.String[] args)
void
printState(java.io.File f)
void
printState(java.io.PrintStream stream)
void
readTypeTypeWeights(java.io.File weightsFile)
void
sample(int iterations, boolean shouldInitialize, int docCycleCount)
protected void
sampleTopicsForOneDoc(FeatureSequence tokenSequence, FeatureSequence topicSequence, boolean initializing, boolean debugging)
void
setRandomSeed(int seed)
void
setTopicDisplay(int interval, int n)
java.lang.String
topWords(int numWords)
-
-
-
Field Detail
-
sourceWordPattern
public static java.util.regex.Pattern sourceWordPattern
-
targetWordPattern
public static java.util.regex.Pattern targetWordPattern
-
data
protected java.util.ArrayList<TopicAssignment> data
-
alphabet
protected Alphabet alphabet
-
topicAlphabet
protected LabelAlphabet topicAlphabet
-
numTopics
protected int numTopics
-
numTypes
protected int numTypes
-
alpha
protected double alpha
-
alphaSum
protected double alphaSum
-
beta
protected double beta
-
betaSum
protected double betaSum
-
oneDocTopicCounts
protected int[] oneDocTopicCounts
-
typeTopicCounts
protected int[][] typeTopicCounts
-
tokensPerTopic
protected int[] tokensPerTopic
-
typeTypeWeights
protected com.carrotsearch.hppc.IntDoubleHashMap[] typeTypeWeights
-
logTypeTopicWeights
protected double[][] logTypeTopicWeights
-
typeTopicWeights
protected double[][] typeTopicWeights
-
totalTopicWeights
protected double[] totalTopicWeights
-
showTopicsInterval
public int showTopicsInterval
-
wordsPerTopic
public int wordsPerTopic
-
random
protected Randoms random
-
formatter
protected java.text.NumberFormat formatter
-
printLogLikelihood
protected boolean printLogLikelihood
-
logCountRatioCache
protected double[] logCountRatioCache
-
-
Constructor Detail
-
WeightedTopicModel
public WeightedTopicModel(int numberOfTopics, double alphaSum, double beta, Randoms random)
-
-
Method Detail
-
getAlphabet
public Alphabet getAlphabet()
-
getTopicAlphabet
public LabelAlphabet getTopicAlphabet()
-
getNumTopics
public int getNumTopics()
-
getData
public java.util.ArrayList<TopicAssignment> getData()
-
setTopicDisplay
public void setTopicDisplay(int interval, int n)
-
setRandomSeed
public void setRandomSeed(int seed)
-
getTypeTopicCounts
public int[][] getTypeTopicCounts()
-
getTopicTotals
public int[] getTopicTotals()
-
addInstances
public void addInstances(InstanceList training)
-
readTypeTypeWeights
public void readTypeTypeWeights(java.io.File weightsFile) throws java.lang.Exception
- Throws:
java.lang.Exception
-
sample
public void sample(int iterations, boolean shouldInitialize, int docCycleCount) throws java.io.IOException
- Throws:
java.io.IOException
-
sampleTopicsForOneDoc
protected void sampleTopicsForOneDoc(FeatureSequence tokenSequence, FeatureSequence topicSequence, boolean initializing, boolean debugging)
-
topWords
public java.lang.String topWords(int numWords)
-
getEstimator
public MarginalProbEstimator getEstimator()
-
printState
public void printState(java.io.File f) throws java.io.IOException
- Throws:
java.io.IOException
-
printState
public void printState(java.io.PrintStream stream)
-
main
public static void main(java.lang.String[] args) throws java.lang.Exception
- Throws:
java.lang.Exception
-
-