public class MentionExtractor
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
protected java.lang.String |
currentDocumentID |
protected Dictionaries |
dictionaries |
protected int |
maxID
The maximum mention ID: for preventing duplicated mention ID assignment
|
CorefMentionFinder |
mentionFinder |
protected Semantics |
semantics |
protected LogisticClassifier<java.lang.String,java.lang.String> |
singletonPredictor |
protected StanfordCoreNLP |
stanfordProcessor |
static boolean |
VERBOSE |
Constructor and Description |
---|
MentionExtractor(Dictionaries dict,
Semantics semantics) |
Modifier and Type | Method and Description |
---|---|
Document |
arrange(Annotation anno,
java.util.List<java.util.List<CoreLabel>> words,
java.util.List<Tree> trees,
java.util.List<java.util.List<Mention>> unorderedMentions) |
java.util.List<java.util.List<Mention>> |
arrange(Annotation anno,
java.util.List<java.util.List<CoreLabel>> words,
java.util.List<Tree> trees,
java.util.List<java.util.List<Mention>> unorderedMentions,
boolean doMergeLabels)
Post-processes the extracted mentions.
|
Document |
arrange(Annotation anno,
java.util.List<java.util.List<CoreLabel>> words,
java.util.List<Tree> trees,
java.util.List<java.util.List<Mention>> unorderedMentions,
java.util.List<java.util.List<Mention>> unorderedGoldMentions,
boolean doMergeLabels) |
static Tree |
findExactMatch(Tree tree,
int first,
int last)
Finds the tree the matches this span exactly
|
protected int |
getHeadIndex(Tree t) |
static void |
initializeUtterance(java.util.List<CoreLabel> tokens) |
protected static StanfordCoreNLP |
loadStanfordProcessor(java.util.Properties props)
Load Stanford Processor: skip unnecessary annotator
|
static void |
mergeLabels(Tree tree,
java.util.List<CoreLabel> sentence)
Sets the label of the leaf nodes of a Tree to be the CoreLabels in the given sentence.
|
Document |
nextDoc()
Extracts the info relevant for coref from the next document in the corpus
|
void |
resetDocs()
Reset so that we start at the beginning of the document collection
|
void |
setMentionFinder(CorefMentionFinder mentionFinder) |
protected java.lang.String currentDocumentID
protected final Dictionaries dictionaries
protected final Semantics semantics
public CorefMentionFinder mentionFinder
protected StanfordCoreNLP stanfordProcessor
protected LogisticClassifier<java.lang.String,java.lang.String> singletonPredictor
protected int maxID
public static final boolean VERBOSE
public MentionExtractor(Dictionaries dict, Semantics semantics)
public void setMentionFinder(CorefMentionFinder mentionFinder)
public Document nextDoc() throws java.lang.Exception
java.lang.Exception
public void resetDocs()
public Document arrange(Annotation anno, java.util.List<java.util.List<CoreLabel>> words, java.util.List<Tree> trees, java.util.List<java.util.List<Mention>> unorderedMentions) throws java.lang.Exception
java.lang.Exception
protected int getHeadIndex(Tree t)
public Document arrange(Annotation anno, java.util.List<java.util.List<CoreLabel>> words, java.util.List<Tree> trees, java.util.List<java.util.List<Mention>> unorderedMentions, java.util.List<java.util.List<Mention>> unorderedGoldMentions, boolean doMergeLabels) throws java.lang.Exception
java.lang.Exception
public java.util.List<java.util.List<Mention>> arrange(Annotation anno, java.util.List<java.util.List<CoreLabel>> words, java.util.List<Tree> trees, java.util.List<java.util.List<Mention>> unorderedMentions, boolean doMergeLabels) throws java.lang.Exception
words
- List of words in each sentence, in textual ordertrees
- List of trees, one per sentenceunorderedMentions
- List of unordered, unprocessed mentions
Each mention MUST have startIndex and endIndex set!
Optionally, if scoring is desired, mentions must have mentionID and originalRef set.
All the other Mention fields are set here.java.lang.Exception
public static void mergeLabels(Tree tree, java.util.List<CoreLabel> sentence)
public static Tree findExactMatch(Tree tree, int first, int last)
tree
- Leaves must be indexed!first
- First element in the span (first position has offset 1)last
- Last element included in the span (first position has offset 1)protected static StanfordCoreNLP loadStanfordProcessor(java.util.Properties props)
public static void initializeUtterance(java.util.List<CoreLabel> tokens)