next up previous contents
Next: Tokenizer rules file Up: Configuration File and Command Previous: Valid options   Contents


Sample Configuration File

#### default configuration file for spanish analyzer
####-------- Trace options. Only effective if we have compiled with -DVERBOSE.
####-------- For development purposes only.
TraceLevel=0
TraceModule=0x0000
####-------- General options
Lang=es
# Input/output formats. (plain, token, splitted, morfo, tagged, sense, parsed, dep)
InputFormat=plain
OutputFormat=morfo
# consider each newline as a sentence end
AlwaysFlush=no
####-------- Tokenizer options
TokenizerFile="/usr/local/share/FreeLing/es/tokenizer.dat"
####-------- Splitter options
SplitterFile="/usr/local/share/FreeLing/es/splitter.dat" ####-------- Morfo options
SuffixAnalysis=yes
MultiwordsDetection=yes
NumbersDetection=yes
PunctuationDetection=yes
DatesDetection=yes
QuantitiesDetection=yes
DictionarySearch=yes
ProbabilityAssignment=yes
NERecognition=yes
DecimalPoint=","
ThousandPoint="."
LocutionsFile=/usr/local/share/FreeLing/es/locucions.dat
CurrencyFile=/usr/local/share/FreeLing/es/moneda.dat
SuffixFile=/usr/local/share/FreeLing/es/sufixos.dat
ProbabilityFile=/usr/local/share/FreeLing/es/probabilitats.dat
DictionaryFile=/usr/local/share/FreeLing/es/maco.db
NPDataFile=/usr/local/share/FreeLing/es/np.dat
PunctuationFile=/usr/local/share/FreeLing/common/punct.dat
ProbabilityThreshold=0.001
TitleLength=0
####--------Tagger options
Tagger=hmm
TaggerHMMFile=/usr/local/share/FreeLing/es/tagger.dat
TaggerRelaxFile=/usr/local/share/FreeLing/es/constr_gram.dat
TaggerRelaxMaxIter=500
TaggerRelaxScaleFactor=670.0
TaggerRelaxEpsilon=0.001
####--------- NEC options
NEClassification=no
NECFilePrefix=/usr/local/share/FreeLing/es/nec
####--------- Sense annotation options
SenseAnnotation=none
SenseFile=/usr/local/share/FreeLing/es/senses.db
####--------- Parser options
GrammarFile=/usr/local/share/FreeLing/es/grammar-dep.dat
####--------- Dependency Parser options
HeuristicsFile=/usr/local/share/FreeLing/es/dependences.dat


next up previous contents
Next: Tokenizer rules file Up: Configuration File and Command Previous: Valid options   Contents
2006-04-26