Package cc.mallet.topics
Class WeightedTopicModel
- java.lang.Object
-
- cc.mallet.topics.WeightedTopicModel
-
- All Implemented Interfaces:
java.io.Serializable
public class WeightedTopicModel extends java.lang.Object implements java.io.Serializable- See Also:
- Serialized Form
-
-
Field Summary
Fields Modifier and Type Field Description protected doublealphaprotected Alphabetalphabetprotected doublealphaSumprotected doublebetaprotected doublebetaSumprotected java.util.ArrayList<TopicAssignment>dataprotected java.text.NumberFormatformatterprotected double[]logCountRatioCacheprotected double[][]logTypeTopicWeightsprotected intnumTopicsprotected intnumTypesprotected int[]oneDocTopicCountsprotected booleanprintLogLikelihoodprotected RandomsrandomintshowTopicsIntervalstatic java.util.regex.PatternsourceWordPatternstatic java.util.regex.PatterntargetWordPatternprotected int[]tokensPerTopicprotected LabelAlphabettopicAlphabetprotected double[]totalTopicWeightsprotected int[][]typeTopicCountsprotected double[][]typeTopicWeightsprotected com.carrotsearch.hppc.IntDoubleHashMap[]typeTypeWeightsintwordsPerTopic
-
Constructor Summary
Constructors Constructor Description WeightedTopicModel(int numberOfTopics, double alphaSum, double beta, Randoms random)
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description voidaddInstances(InstanceList training)AlphabetgetAlphabet()java.util.ArrayList<TopicAssignment>getData()MarginalProbEstimatorgetEstimator()intgetNumTopics()LabelAlphabetgetTopicAlphabet()int[]getTopicTotals()int[][]getTypeTopicCounts()static voidmain(java.lang.String[] args)voidprintState(java.io.File f)voidprintState(java.io.PrintStream stream)voidreadTypeTypeWeights(java.io.File weightsFile)voidsample(int iterations, boolean shouldInitialize, int docCycleCount)protected voidsampleTopicsForOneDoc(FeatureSequence tokenSequence, FeatureSequence topicSequence, boolean initializing, boolean debugging)voidsetRandomSeed(int seed)voidsetTopicDisplay(int interval, int n)java.lang.StringtopWords(int numWords)
-
-
-
Field Detail
-
sourceWordPattern
public static java.util.regex.Pattern sourceWordPattern
-
targetWordPattern
public static java.util.regex.Pattern targetWordPattern
-
data
protected java.util.ArrayList<TopicAssignment> data
-
alphabet
protected Alphabet alphabet
-
topicAlphabet
protected LabelAlphabet topicAlphabet
-
numTopics
protected int numTopics
-
numTypes
protected int numTypes
-
alpha
protected double alpha
-
alphaSum
protected double alphaSum
-
beta
protected double beta
-
betaSum
protected double betaSum
-
oneDocTopicCounts
protected int[] oneDocTopicCounts
-
typeTopicCounts
protected int[][] typeTopicCounts
-
tokensPerTopic
protected int[] tokensPerTopic
-
typeTypeWeights
protected com.carrotsearch.hppc.IntDoubleHashMap[] typeTypeWeights
-
logTypeTopicWeights
protected double[][] logTypeTopicWeights
-
typeTopicWeights
protected double[][] typeTopicWeights
-
totalTopicWeights
protected double[] totalTopicWeights
-
showTopicsInterval
public int showTopicsInterval
-
wordsPerTopic
public int wordsPerTopic
-
random
protected Randoms random
-
formatter
protected java.text.NumberFormat formatter
-
printLogLikelihood
protected boolean printLogLikelihood
-
logCountRatioCache
protected double[] logCountRatioCache
-
-
Constructor Detail
-
WeightedTopicModel
public WeightedTopicModel(int numberOfTopics, double alphaSum, double beta, Randoms random)
-
-
Method Detail
-
getAlphabet
public Alphabet getAlphabet()
-
getTopicAlphabet
public LabelAlphabet getTopicAlphabet()
-
getNumTopics
public int getNumTopics()
-
getData
public java.util.ArrayList<TopicAssignment> getData()
-
setTopicDisplay
public void setTopicDisplay(int interval, int n)
-
setRandomSeed
public void setRandomSeed(int seed)
-
getTypeTopicCounts
public int[][] getTypeTopicCounts()
-
getTopicTotals
public int[] getTopicTotals()
-
addInstances
public void addInstances(InstanceList training)
-
readTypeTypeWeights
public void readTypeTypeWeights(java.io.File weightsFile) throws java.lang.Exception- Throws:
java.lang.Exception
-
sample
public void sample(int iterations, boolean shouldInitialize, int docCycleCount) throws java.io.IOException- Throws:
java.io.IOException
-
sampleTopicsForOneDoc
protected void sampleTopicsForOneDoc(FeatureSequence tokenSequence, FeatureSequence topicSequence, boolean initializing, boolean debugging)
-
topWords
public java.lang.String topWords(int numWords)
-
getEstimator
public MarginalProbEstimator getEstimator()
-
printState
public void printState(java.io.File f) throws java.io.IOException- Throws:
java.io.IOException
-
printState
public void printState(java.io.PrintStream stream)
-
main
public static void main(java.lang.String[] args) throws java.lang.Exception- Throws:
java.lang.Exception
-
-