public class CoNLLUReader
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
class |
CoNLLUReader.CoNLLUDocument
class to store info for a CoNLL-U document
|
class |
CoNLLUReader.CoNLLUSentence
class to store info for a CoNLL-U sentence
|
Modifier and Type | Field and Description |
---|---|
static java.util.HashMap<java.lang.String,java.lang.String> |
classShorthandToFull
shorthands for CoreAnnotations
|
int |
columnCount |
static java.util.regex.Pattern |
COMMENT_LINE
patterns to match in CoNLL-U file
|
static int |
CoNLLU_GovField |
static int |
CoNLLU_IndexField
field constants
|
static int |
CoNLLU_LemmaField |
static int |
CoNLLU_MiscField |
static int |
CoNLLU_RelnField |
static int |
CoNLLU_UPOSField |
static int |
CoNLLU_WordField |
static int |
CoNLLU_XPOSField |
static java.util.regex.Pattern |
DOCUMENT_LINE |
static java.util.regex.Pattern |
MWT_LINE |
static java.util.regex.Pattern |
TOKEN_LINE |
Constructor and Description |
---|
CoNLLUReader() |
CoNLLUReader(java.util.Properties props) |
Modifier and Type | Method and Description |
---|---|
Annotation |
convertCoNLLUDocumentToAnnotation(CoNLLUReader.CoNLLUDocument doc)
Convert a CoNLLUDocument into an Annotation
The convention is that a CoNLLU document represents a list of sentences,
one sentence per line, separated by newline.
|
CoreMap |
convertCoNLLUSentenceToCoreMap(CoNLLUReader.CoNLLUDocument doc,
CoNLLUReader.CoNLLUSentence sentence)
Convert a list of CoNLL-U token lines into a sentence CoreMap
|
java.util.List<Annotation> |
readCoNLLUFile(java.lang.String filePath)
Read a CoNLL-U file and generate a list of Annotations
|
java.util.List<CoNLLUReader.CoNLLUDocument> |
readCoNLLUFileCreateCoNLLUDocuments(java.lang.String filePath)
Read a CoNLL-U file and generate a list of CoNLLUDocument objects
|
java.util.List<java.lang.String> |
readCoNLLUFileCreateCoNLLXLines(java.lang.String filePath)
Read a CoNLL-U file and generate a list of CoNLL-X lines
|
public static final int CoNLLU_IndexField
public static final int CoNLLU_WordField
public static final int CoNLLU_LemmaField
public static final int CoNLLU_UPOSField
public static final int CoNLLU_XPOSField
public static final int CoNLLU_GovField
public static final int CoNLLU_RelnField
public static final int CoNLLU_MiscField
public int columnCount
public static java.util.regex.Pattern COMMENT_LINE
public static java.util.regex.Pattern DOCUMENT_LINE
public static java.util.regex.Pattern MWT_LINE
public static java.util.regex.Pattern TOKEN_LINE
public static java.util.HashMap<java.lang.String,java.lang.String> classShorthandToFull
public CoNLLUReader() throws java.lang.ClassNotFoundException
java.lang.ClassNotFoundException
public CoNLLUReader(java.util.Properties props) throws java.lang.ClassNotFoundException
java.lang.ClassNotFoundException
public java.util.List<Annotation> readCoNLLUFile(java.lang.String filePath) throws java.io.IOException
java.io.IOException
public java.util.List<java.lang.String> readCoNLLUFileCreateCoNLLXLines(java.lang.String filePath) throws java.io.IOException
java.io.IOException
public java.util.List<CoNLLUReader.CoNLLUDocument> readCoNLLUFileCreateCoNLLUDocuments(java.lang.String filePath) throws java.io.IOException
java.io.IOException
public Annotation convertCoNLLUDocumentToAnnotation(CoNLLUReader.CoNLLUDocument doc)
public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUReader.CoNLLUDocument doc, CoNLLUReader.CoNLLUSentence sentence)