public class BasicEntityExtractor extends java.lang.Object implements Extractor
Modifier and Type | Field and Description |
---|---|
protected java.util.Set<java.lang.String> |
annotationsToSkip |
protected EntityMentionFactory |
entityMentionFactory |
protected java.lang.String |
gazetteerLocation |
java.util.logging.Logger |
logger |
protected boolean |
useBIO |
protected boolean |
useNERTags |
protected boolean |
useSubTypes |
Constructor and Description |
---|
BasicEntityExtractor(java.lang.String gazetteerLocation,
boolean useSubTypes,
java.util.Set<java.lang.String> annotationsToSkip,
boolean useBIO,
EntityMentionFactory factory,
boolean useNERTags) |
Modifier and Type | Method and Description |
---|---|
void |
annotate(Annotation doc)
Annotate an ExtractionDataSet with entities.
|
java.lang.String |
getEntityTypeForTag(java.lang.String tag) |
static java.lang.String |
labeledSentenceToString(java.util.List<CoreLabel> labeledSentence,
boolean printNer)
for printing labeled sentence in less verbose manner
|
static BasicEntityExtractor |
load(java.lang.String path,
java.lang.Class<? extends BasicEntityExtractor> entityClassifier,
boolean preferDefaultGazetteer)
Loads the model from disk.
|
void |
makeAnnotationFromAllNERTags(CoreMap sentence)
Converts NamedEntityTagAnnotation tags into
EntityMention s. |
void |
makeAnnotationFromGivenNERTag(CoreMap sentence,
java.lang.String nerTag,
java.lang.String entityType)
Converts NamedEntityTagAnnotation tags into
EntityMention s. |
void |
makeEntityMention(CoreMap sentence,
int start,
int end,
java.lang.String label,
java.util.List<EntityMention> entities,
int sentCount) |
EntityMention |
makeEntityMention(CoreMap sentence,
int start,
int end,
java.lang.String label,
java.lang.String identifier) |
static java.lang.String |
makeEntityMentionIdentifier(CoreMap sentence,
int sentCount,
int entId) |
void |
postprocessSentence(CoreMap sentence,
int sentCount) |
void |
runTestSet(java.util.List<java.util.List<CoreLabel>> testSet)
This should be called after the classifier has been trained and
parseAndTrain has been called to accumulate test set
This will return precision,recall and F1 measure
|
void |
save(java.lang.String path)
Serializes this extractor to a file
|
static void |
saveCoNLL(java.io.PrintStream os,
java.util.List<java.util.List<CoreLabel>> sentences,
boolean alreadyBIO) |
static void |
saveCoNLLFiles(java.lang.String dir,
Annotation dataset,
boolean useSubTypes,
boolean alreadyBIO) |
void |
setAnnotationsToSkip(java.util.Set<java.lang.String> annotationsToSkip) |
void |
setLoggerLevel(java.util.logging.Level level) |
void |
train(Annotation doc)
Trains one extractor model using the given dataset
|
protected java.lang.String gazetteerLocation
protected java.util.Set<java.lang.String> annotationsToSkip
protected boolean useSubTypes
protected boolean useBIO
protected EntityMentionFactory entityMentionFactory
public final java.util.logging.Logger logger
protected boolean useNERTags
public BasicEntityExtractor(java.lang.String gazetteerLocation, boolean useSubTypes, java.util.Set<java.lang.String> annotationsToSkip, boolean useBIO, EntityMentionFactory factory, boolean useNERTags)
public void annotate(Annotation doc)
public java.lang.String getEntityTypeForTag(java.lang.String tag)
public void postprocessSentence(CoreMap sentence, int sentCount)
public void makeAnnotationFromGivenNERTag(CoreMap sentence, java.lang.String nerTag, java.lang.String entityType)
EntityMention
s. This
finds the longest sequence of NamedEntityTagAnnotation tags of the matching
type.sentence
- A sentence, ideally annotated with NamedEntityTagAnnotationnerTag
- The name of the NER tag to copy, e.g. "DATE".entityType
- The type of the EntityMention
objects createdpublic void makeAnnotationFromAllNERTags(CoreMap sentence)
EntityMention
s. This
finds the longest sequence of NamedEntityTagAnnotation tags of the matching
type.sentence
- A sentence annotated with NamedEntityTagAnnotationpublic void makeEntityMention(CoreMap sentence, int start, int end, java.lang.String label, java.util.List<EntityMention> entities, int sentCount)
public static java.lang.String makeEntityMentionIdentifier(CoreMap sentence, int sentCount, int entId)
public EntityMention makeEntityMention(CoreMap sentence, int start, int end, java.lang.String label, java.lang.String identifier)
public void runTestSet(java.util.List<java.util.List<CoreLabel>> testSet)
public void setAnnotationsToSkip(java.util.Set<java.lang.String> annotationsToSkip)
annotationsToSkip
- The type of annotation to skip in assigning answer annotationspublic void train(Annotation doc)
Extractor
public static void saveCoNLLFiles(java.lang.String dir, Annotation dataset, boolean useSubTypes, boolean alreadyBIO) throws java.io.IOException
java.io.IOException
public static void saveCoNLL(java.io.PrintStream os, java.util.List<java.util.List<CoreLabel>> sentences, boolean alreadyBIO)
public static BasicEntityExtractor load(java.lang.String path, java.lang.Class<? extends BasicEntityExtractor> entityClassifier, boolean preferDefaultGazetteer) throws java.lang.ClassCastException, java.io.IOException, java.lang.ClassNotFoundException
path
- The location of model that was saved to diskjava.lang.ClassCastException
- if model is the wrong formatjava.io.IOException
- if the model file doesn't exist or is otherwise
unavailable/incompletejava.lang.ClassNotFoundException
- this would probably indicate a serious classpath problempublic void save(java.lang.String path) throws java.io.IOException
Extractor
public static java.lang.String labeledSentenceToString(java.util.List<CoreLabel> labeledSentence, boolean printNer)
public void setLoggerLevel(java.util.logging.Level level)
setLoggerLevel
in interface Extractor