public class ArabicSegmenter extends java.lang.Object implements WordSegmenter, ThreadsafeProcessor<java.lang.String,java.lang.String>
| Constructor and Description |
|---|
ArabicSegmenter(ArabicSegmenter other)
Copy constructor.
|
ArabicSegmenter(java.util.Properties props)
Make an Arabic Segmenter.
|
| Modifier and Type | Method and Description |
|---|---|
void |
finishTraining() |
static ArabicSegmenter |
getSegmenter(java.util.Properties options)
Train a new segmenter or load an trained model from file.
|
void |
initializeTraining(double numTrees) |
void |
loadSegmenter(java.lang.String filename) |
void |
loadSegmenter(java.lang.String filename,
java.util.Properties p) |
static void |
main(java.lang.String[] args) |
ThreadsafeProcessor<java.lang.String,java.lang.String> |
newInstance()
Return a new threadsafe instance.
|
java.lang.String |
process(java.lang.String nextInput)
Set the input item that will be processed when a thread is allocated to
this processor.
|
long |
segment(java.io.BufferedReader br,
java.io.PrintWriter pwOut)
Segment all strings from an input.
|
java.util.List<HasWord> |
segment(java.lang.String line) |
java.lang.String |
segmentString(java.lang.String line) |
java.util.List<CoreLabel> |
segmentStringToTokenList(java.lang.String line) |
void |
serializeSegmenter(java.lang.String filename) |
void |
train()
Train a segmenter from raw text.
|
void |
train(java.util.Collection<Tree> trees) |
void |
train(java.util.List<TaggedWord> sentence) |
void |
train(Tree tree) |
public ArabicSegmenter(java.util.Properties props)
props - Options for how to tokenize. See the main method of ArabicTokenizer for detailspublic ArabicSegmenter(ArabicSegmenter other)
other - public void initializeTraining(double numTrees)
initializeTraining in interface WordSegmenterpublic void train(java.util.Collection<Tree> trees)
train in interface WordSegmenterpublic void train(Tree tree)
train in interface WordSegmenterpublic void train(java.util.List<TaggedWord> sentence)
train in interface WordSegmenterpublic void finishTraining()
finishTraining in interface WordSegmenterpublic java.lang.String process(java.lang.String nextInput)
ThreadsafeProcessorprocess in interface ThreadsafeProcessor<java.lang.String,java.lang.String>nextInput - the object to be processedpublic ThreadsafeProcessor<java.lang.String,java.lang.String> newInstance()
ThreadsafeProcessornewInstance in interface ThreadsafeProcessor<java.lang.String,java.lang.String>public java.util.List<HasWord> segment(java.lang.String line)
segment in interface WordSegmenterpublic java.util.List<CoreLabel> segmentStringToTokenList(java.lang.String line)
public java.lang.String segmentString(java.lang.String line)
public long segment(java.io.BufferedReader br,
java.io.PrintWriter pwOut)
br - -- input stream to segmentpwOut - -- output stream to write the segmenter textpublic void train()
public void serializeSegmenter(java.lang.String filename)
public void loadSegmenter(java.lang.String filename,
java.util.Properties p)
public void loadSegmenter(java.lang.String filename)
loadSegmenter in interface WordSegmenterpublic static void main(java.lang.String[] args)
args - public static ArabicSegmenter getSegmenter(java.util.Properties options)
options - Properties to specify segmenter behavior