public class TaggerConfig
extends java.util.Properties
Modifier and Type | Class and Description |
---|---|
static class |
TaggerConfig.Mode |
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
APPROXIMATE |
static java.lang.String |
ARCH |
static java.lang.String |
CLOSED_CLASS_THRESHOLD |
static java.lang.String |
CUR_WORD_MIN_FEATURE_THRESH |
static java.lang.String |
DEBUG |
static java.lang.String |
DEFAULT_REG_L1 |
static java.lang.String |
ENCODING |
static java.lang.String |
ENCODING_PROPERTY |
static java.lang.String |
ITERATIONS |
static java.lang.String |
LANG |
static java.lang.String |
LEARN_CLOSED_CLASS |
static java.lang.String |
MIN_FEATURE_THRESH |
static java.lang.String |
MIN_WORDS_LOCK_TAGS |
static java.lang.String |
NTHREADS |
static java.lang.String |
OCCURRING_TAGS_ONLY |
static java.lang.String |
OUTPUT_FILE |
static java.lang.String |
OUTPUT_FORMAT |
static java.lang.String |
OUTPUT_FORMAT_OPTIONS |
static java.lang.String |
POSSIBLE_TAGS_ONLY |
static java.lang.String |
RARE_WORD_MIN_FEATURE_THRESH |
static java.lang.String |
RARE_WORD_THRESH |
static java.lang.String |
SEARCH |
static java.lang.String |
SGML |
static java.lang.String |
SIGMA_SQUARED |
static java.lang.String |
TAG_INSIDE |
static java.lang.String |
TAG_SEPARATOR |
static java.lang.String |
TAG_SEPARATOR_PROPERTY |
static java.lang.String |
TOKENIZE |
static java.lang.String |
TOKENIZER_FACTORY |
static java.lang.String |
TOKENIZER_OPTIONS |
static java.lang.String |
VERBOSE |
static java.lang.String |
VERBOSE_RESULTS |
static java.lang.String |
VERY_COMMON_WORD_THRESH |
static java.lang.String |
WORD_FUNCTION |
static java.lang.String |
XML_INPUT |
Constructor and Description |
---|
TaggerConfig(java.util.Properties props) |
TaggerConfig(java.lang.String... args) |
TaggerConfig(TaggerConfig old)
We force you to pass in a TaggerConfig rather than any other
superclass so that we know the arg error checking has already occurred
|
Modifier and Type | Method and Description |
---|---|
void |
dump() |
void |
dump(java.io.PrintStream stream) |
java.lang.String |
getArch() |
java.lang.String[] |
getClosedClassTags() |
int |
getClosedTagThreshold() |
int |
getCurWordMinFeatureThresh() |
boolean |
getDebug() |
java.lang.String |
getDebugPrefix() |
double |
getDefaultScore()
Returns a default score to be used for each tag that is incompatible with
the current word (e.g., the tag CC for the word "apple").
|
static java.lang.String |
getDefaultTagSeparator() |
java.lang.String |
getEncoding() |
java.lang.String |
getFile() |
int |
getIterations() |
java.lang.String |
getLang() |
boolean |
getLearnClosedClassTags() |
int |
getMinFeatureThresh() |
int |
getMinWordsLockTags() |
TaggerConfig.Mode |
getMode() |
java.lang.String |
getModel() |
int |
getNThreads() |
java.lang.String[] |
getOpenClassTags() |
java.lang.String |
getOutputFile() |
java.lang.String |
getOutputFormat() |
boolean |
getOutputLemmas() |
java.lang.String[] |
getOutputOptions() |
boolean |
getOutputOptionsContains(java.lang.String sought) |
boolean |
getOutputVerbosity() |
int |
getRareWordMinFeatureThresh() |
int |
getRareWordThresh() |
double |
getRegL1() |
java.lang.String |
getSearch() |
java.lang.String |
getSentenceDelimiter()
This returns the sentence delimiter used when tokenizing text
using the tokenizer requested in this config.
|
boolean |
getSGML() |
double |
getSigmaSquared() |
java.lang.String |
getTagInside()
Return a regex of XML elements to tag inside of.
|
java.lang.String |
getTagSeparator() |
boolean |
getTokenize() |
java.lang.String |
getTokenizerFactory() |
boolean |
getTokenizerInvertible() |
java.lang.String |
getTokenizerOptions() |
boolean |
getVerbose() |
boolean |
getVerboseResults() |
int |
getVeryCommonWordThresh() |
java.lang.String |
getWordFunction() |
java.lang.String[] |
getXMLInput() |
boolean |
keepEmptySentences() |
boolean |
occurringTagsOnly() |
boolean |
possibleTagsOnly() |
static TaggerConfig |
readConfig(java.io.DataInputStream stream)
Read in a TaggerConfig.
|
void |
saveConfig(java.io.OutputStream os)
Serialize the TaggerConfig.
|
void |
setProperties(java.util.Properties props) |
java.lang.String |
toString() |
boolean |
useStdin()
Returns whether or not we should use stdin for reading when
tagging data.
|
getProperty, getProperty, list, list, load, load, loadFromXML, propertyNames, save, setProperty, store, store, storeToXML, storeToXML, stringPropertyNames
clear, clone, compute, computeIfAbsent, computeIfPresent, contains, containsKey, containsValue, elements, entrySet, equals, forEach, get, getOrDefault, hashCode, isEmpty, keys, keySet, merge, put, putAll, putIfAbsent, rehash, remove, remove, replace, replace, replaceAll, size, values
public static final java.lang.String SEARCH
public static final java.lang.String TAG_SEPARATOR
public static final java.lang.String TOKENIZE
public static final java.lang.String DEBUG
public static final java.lang.String ITERATIONS
public static final java.lang.String ARCH
public static final java.lang.String WORD_FUNCTION
public static final java.lang.String RARE_WORD_THRESH
public static final java.lang.String MIN_FEATURE_THRESH
public static final java.lang.String CUR_WORD_MIN_FEATURE_THRESH
public static final java.lang.String RARE_WORD_MIN_FEATURE_THRESH
public static final java.lang.String VERY_COMMON_WORD_THRESH
public static final java.lang.String OCCURRING_TAGS_ONLY
public static final java.lang.String POSSIBLE_TAGS_ONLY
public static final java.lang.String SIGMA_SQUARED
public static final java.lang.String ENCODING
public static final java.lang.String LEARN_CLOSED_CLASS
public static final java.lang.String CLOSED_CLASS_THRESHOLD
public static final java.lang.String VERBOSE
public static final java.lang.String VERBOSE_RESULTS
public static final java.lang.String SGML
public static final java.lang.String LANG
public static final java.lang.String TOKENIZER_FACTORY
public static final java.lang.String XML_INPUT
public static final java.lang.String TAG_INSIDE
public static final java.lang.String APPROXIMATE
public static final java.lang.String TOKENIZER_OPTIONS
public static final java.lang.String DEFAULT_REG_L1
public static final java.lang.String OUTPUT_FILE
public static final java.lang.String OUTPUT_FORMAT
public static final java.lang.String OUTPUT_FORMAT_OPTIONS
public static final java.lang.String NTHREADS
public static final java.lang.String MIN_WORDS_LOCK_TAGS
public static final java.lang.String ENCODING_PROPERTY
public static final java.lang.String TAG_SEPARATOR_PROPERTY
public TaggerConfig(TaggerConfig old)
public TaggerConfig(java.lang.String... args)
public TaggerConfig(java.util.Properties props)
public void setProperties(java.util.Properties props)
public java.lang.String getModel()
public java.lang.String getFile()
public java.lang.String getOutputFile()
public java.lang.String getOutputFormat()
public java.lang.String[] getOutputOptions()
public boolean getOutputVerbosity()
public boolean getOutputLemmas()
public boolean keepEmptySentences()
public boolean getOutputOptionsContains(java.lang.String sought)
public java.lang.String getSearch()
public double getSigmaSquared()
public int getIterations()
public int getRareWordThresh()
public int getMinFeatureThresh()
public int getCurWordMinFeatureThresh()
public int getRareWordMinFeatureThresh()
public int getVeryCommonWordThresh()
public boolean occurringTagsOnly()
public boolean possibleTagsOnly()
public java.lang.String getLang()
public java.lang.String[] getOpenClassTags()
public java.lang.String[] getClosedClassTags()
public boolean getLearnClosedClassTags()
public int getClosedTagThreshold()
public java.lang.String getArch()
public java.lang.String getWordFunction()
public boolean getDebug()
public java.lang.String getDebugPrefix()
public java.lang.String getTokenizerFactory()
public static java.lang.String getDefaultTagSeparator()
public final java.lang.String getTagSeparator()
public boolean getTokenize()
public java.lang.String getEncoding()
public double getRegL1()
public java.lang.String[] getXMLInput()
public boolean getVerbose()
public boolean getVerboseResults()
public boolean getSGML()
public int getNThreads()
public int getMinWordsLockTags()
public java.lang.String getTagInside()
public java.lang.String getTokenizerOptions()
public boolean getTokenizerInvertible()
public double getDefaultScore()
public void dump()
public void dump(java.io.PrintStream stream)
public java.lang.String toString()
toString
in class java.util.Hashtable<java.lang.Object,java.lang.Object>
public java.lang.String getSentenceDelimiter()
public boolean useStdin()
public TaggerConfig.Mode getMode()
public void saveConfig(java.io.OutputStream os) throws java.io.IOException
os
- Where to write this TaggerConfigjava.io.IOException
- If any IO problemspublic static TaggerConfig readConfig(java.io.DataInputStream stream) throws java.io.IOException, java.lang.ClassNotFoundException
stream
- Where to read fromjava.io.IOException
- Misc IOErrorjava.lang.ClassNotFoundException
- Class error