public class CleanXmlAnnotator extends java.lang.Object implements Annotator
Modifier and Type | Field and Description |
---|---|
static boolean |
DEFAULT_ALLOW_FLAWS |
static java.lang.String |
DEFAULT_DATE_TAGS |
static java.lang.String |
DEFAULT_DOC_ANNOTATIONS_PATTERNS |
static java.lang.String |
DEFAULT_DOCID_TAGS |
static java.lang.String |
DEFAULT_DOCTYPE_TAGS |
static java.lang.String |
DEFAULT_QUOTE_AUTHOR_ATTRIBUTES |
static java.lang.String |
DEFAULT_QUOTE_TAGS |
static java.lang.String |
DEFAULT_SECTION_ANNOTATIONS_PATTERNS |
static java.lang.String |
DEFAULT_SECTION_TAGS |
static java.lang.String |
DEFAULT_SENTENCE_ENDERS |
static java.lang.String |
DEFAULT_SINGLE_SENTENCE_TAGS |
static java.lang.String |
DEFAULT_SPEAKER_TAGS |
static java.lang.String |
DEFAULT_TOKEN_ANNOTATIONS_PATTERNS |
static java.lang.String |
DEFAULT_UTTERANCE_TURN_TAGS |
static java.lang.String |
DEFAULT_XML_TAGS |
DEFAULT_REQUIREMENTS, STANFORD_CDC_TOKENIZE, STANFORD_CLEAN_XML, STANFORD_COLUMN_DATA_CLASSIFIER, STANFORD_COREF, STANFORD_COREF_MENTION, STANFORD_DEPENDENCIES, STANFORD_DETERMINISTIC_COREF, STANFORD_DOCDATE, STANFORD_ENTITY_MENTIONS, STANFORD_GENDER, STANFORD_KBP, STANFORD_LEMMA, STANFORD_LINK, STANFORD_MWT, STANFORD_NATLOG, STANFORD_NER, STANFORD_OPENIE, STANFORD_PARSE, STANFORD_POS, STANFORD_QUOTE, STANFORD_QUOTE_ATTRIBUTION, STANFORD_REGEXNER, STANFORD_RELATION, STANFORD_SENTIMENT, STANFORD_SSPLIT, STANFORD_TOKENIZE, STANFORD_TOKENSREGEX, STANFORD_TRUECASE, STANFORD_UD_FEATURES
Constructor and Description |
---|
CleanXmlAnnotator() |
CleanXmlAnnotator(java.util.Properties properties) |
CleanXmlAnnotator(java.lang.String xmlTagsToRemove,
java.lang.String sentenceEndingTags,
java.lang.String dateTags,
boolean allowFlawedXml) |
Modifier and Type | Method and Description |
---|---|
void |
annotate(Annotation annotation)
Given an Annotation, perform a task on this Annotation.
|
java.util.List<CoreLabel> |
process(java.util.List<CoreLabel> tokens) |
java.util.Set<java.lang.Class<? extends CoreAnnotation>> |
requirementsSatisfied()
Returns a set of requirements for which tasks this annotator can
provide.
|
java.util.Set<java.lang.Class<? extends CoreAnnotation>> |
requires()
Returns the set of tasks which this annotator requires in order
to perform.
|
void |
setDiscourseTags(java.lang.String utteranceTurnTags,
java.lang.String speakerTags) |
void |
setDocAnnotationPatterns(java.lang.String conf) |
void |
setDocIdTagMatcher(java.lang.String docIdTags) |
void |
setDocTypeTagMatcher(java.lang.String docTypeTags) |
void |
setQuoteTagMatcher(java.lang.String quoteTags) |
void |
setSectionAnnotationPatterns(java.lang.String conf) |
void |
setSectionTagMatcher(java.lang.String sectionTags) |
void |
setSingleSentenceTagMatcher(java.lang.String tags) |
void |
setSsplitDiscardTokensMatcher(java.lang.String tags) |
void |
setTokenAnnotationPatterns(java.lang.String conf) |
void |
setTokenBeginTokenEnd(java.util.List<CoreLabel> tokensList)
Helper method to set the TokenBeginAnnotation and TokenEndAnnotation of every token.
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
exactRequirements, unmount
public static final java.lang.String DEFAULT_XML_TAGS
public static final java.lang.String DEFAULT_SENTENCE_ENDERS
public static final java.lang.String DEFAULT_SINGLE_SENTENCE_TAGS
public static final java.lang.String DEFAULT_DATE_TAGS
public static final java.lang.String DEFAULT_DOCID_TAGS
public static final java.lang.String DEFAULT_DOCTYPE_TAGS
public static final java.lang.String DEFAULT_UTTERANCE_TURN_TAGS
public static final java.lang.String DEFAULT_SPEAKER_TAGS
public static final java.lang.String DEFAULT_DOC_ANNOTATIONS_PATTERNS
public static final java.lang.String DEFAULT_TOKEN_ANNOTATIONS_PATTERNS
public static final java.lang.String DEFAULT_SECTION_TAGS
public static final java.lang.String DEFAULT_QUOTE_TAGS
public static final java.lang.String DEFAULT_QUOTE_AUTHOR_ATTRIBUTES
public static final java.lang.String DEFAULT_SECTION_ANNOTATIONS_PATTERNS
public static final boolean DEFAULT_ALLOW_FLAWS
public CleanXmlAnnotator()
public CleanXmlAnnotator(java.util.Properties properties)
public CleanXmlAnnotator(java.lang.String xmlTagsToRemove, java.lang.String sentenceEndingTags, java.lang.String dateTags, boolean allowFlawedXml)
public void setSsplitDiscardTokensMatcher(java.lang.String tags)
public void setSingleSentenceTagMatcher(java.lang.String tags)
public void setDocIdTagMatcher(java.lang.String docIdTags)
public void setDocTypeTagMatcher(java.lang.String docTypeTags)
public void setSectionTagMatcher(java.lang.String sectionTags)
public void setQuoteTagMatcher(java.lang.String quoteTags)
public void setDiscourseTags(java.lang.String utteranceTurnTags, java.lang.String speakerTags)
public void setDocAnnotationPatterns(java.lang.String conf)
public void setTokenAnnotationPatterns(java.lang.String conf)
public void setSectionAnnotationPatterns(java.lang.String conf)
public void setTokenBeginTokenEnd(java.util.List<CoreLabel> tokensList)
public void annotate(Annotation annotation)
Annotator
public java.util.Set<java.lang.Class<? extends CoreAnnotation>> requires()
Annotator
public java.util.Set<java.lang.Class<? extends CoreAnnotation>> requirementsSatisfied()
Annotator
requirementsSatisfied
in interface Annotator