public class AceSentenceSegmenter extends DomReader
Constructor and Description |
---|
AceSentenceSegmenter() |
Modifier and Type | Method and Description |
---|---|
static void |
main(java.lang.String[] args) |
static java.util.List<java.util.List<AceToken>> |
tokenizeAndSegmentSentences(java.lang.String filenamePrefix) |
static AceToken |
wordTokenToAceToken(RobustTokenizer.WordToken wordToken,
int sentence) |
getAttributeValue, getChildByAttribute, getChildByName, getChildByNameAndAttribute, getChildrenByName, readDocument
public static java.util.List<java.util.List<AceToken>> tokenizeAndSegmentSentences(java.lang.String filenamePrefix) throws java.io.IOException, org.xml.sax.SAXException, javax.xml.parsers.ParserConfigurationException
filenamePrefix
- path to an ACE .sgm file (but not including the .sgm extension)java.io.IOException
org.xml.sax.SAXException
javax.xml.parsers.ParserConfigurationException
public static AceToken wordTokenToAceToken(RobustTokenizer.WordToken wordToken, int sentence)
public static void main(java.lang.String[] args) throws java.io.IOException, org.xml.sax.SAXException, javax.xml.parsers.ParserConfigurationException
java.io.IOException
org.xml.sax.SAXException
javax.xml.parsers.ParserConfigurationException