00001 00002 // 00003 // FreeLing - Open Source Language Analyzers 00004 // 00005 // Copyright (C) 2004 TALP Research Center 00006 // Universitat Politecnica de Catalunya 00007 // 00008 // This library is free software; you can redistribute it and/or 00009 // modify it under the terms of the GNU Lesser General Public 00010 // License as published by the Free Software Foundation; either 00011 // version 2.1 of the License, or (at your option) any later version. 00012 // 00013 // This library is distributed in the hope that it will be useful, 00014 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 // Lesser General Public License for more details. 00017 // 00018 // You should have received a copy of the GNU Lesser General Public 00019 // License along with this library; if not, write to the Free Software 00020 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 // 00022 // contact: Lluis Padro (padro@lsi.upc.es) 00023 // TALP Research Center 00024 // despatx C6.212 - Campus Nord UPC 00025 // 08034 Barcelona. SPAIN 00026 // 00028 00029 #ifndef _LANGUAGE 00030 #define _LANGUAGE 00031 00032 #include <string> 00033 #include <list> 00034 #include <vector> 00035 00036 #include "tree.h" 00037 00038 using namespace std; 00039 00040 class word; // predeclaration 00041 00046 00047 class analysis { 00048 00049 private: 00051 string lemma; 00053 string parole; 00055 double prob; 00057 list<string> senses; 00059 list<word> retok; 00060 00061 public: 00063 analysis(); 00065 analysis(const string &, const string &); 00066 00067 void set_lemma(const string &); 00068 void set_parole(const string &); 00069 void set_prob(double); 00070 void set_retokenizable(const list<word> &); 00071 00072 bool has_prob(void) const; 00073 string get_lemma(void) const; 00074 string get_parole(void) const; 00075 string get_short_parole(const string &) const; 00076 double get_prob(void) const; 00077 bool is_retokenizable(void) const; 00078 list<word> get_retokenizable(void) const; 00079 00080 list<string> get_senses(void) const; 00081 void set_senses(const list<string> &); 00082 }; 00083 00084 00089 00090 class word : public list<analysis> { 00091 private: 00093 string form; 00095 word::iterator selected; 00097 list<word> multiword; 00099 unsigned int start, finish; 00101 bool in_dict; 00103 void *user; 00104 00105 public: 00107 word(); 00109 word(const string &); 00111 word(const string &, const list<word> &); 00113 word(const string &, const list<analysis> &, const list<word> &); 00115 word(const word &); 00117 word& operator=(const word&); 00118 00120 bool is_ambiguous(void) const; 00122 bool is_multiword(void) const; 00124 int get_n_words_mw(void) const; 00126 list<word> get_words_mw(void) const; 00128 string get_form(void) const; 00130 analysis get_selected_analysis(void) const; 00132 word::iterator selected_analysis(void) const; 00134 string get_lemma(void) const; 00136 string get_parole(void) const; 00138 string get_short_parole(const string &) const; 00139 00141 list<string> get_senses(void) const; 00143 void set_senses(const list<string> &); 00144 00146 unsigned int get_span_start(void) const; 00147 unsigned int get_span_finish(void) const; 00148 00150 bool found_in_dict(void) const; 00152 void set_found_in_dict(bool); 00153 00155 void add_analysis(const analysis &); 00157 void set_analysis(const analysis &); 00159 void set_analysis(const list<analysis> &); 00161 void set_form(const string &); 00163 void set_span(unsigned int, unsigned int); 00165 void set_user_data(void *); 00166 00168 int get_n_analysis(void) const; 00170 void copy_analysis(const word &); 00172 void select_analysis(word::iterator); 00174 list<analysis> get_analysis(void) const; 00176 word::iterator analysis_begin(void); 00177 word::const_iterator analysis_begin(void) const; 00179 word::iterator analysis_end(void); 00180 word::const_iterator analysis_end(void) const; 00181 }; 00182 00183 00184 00190 00191 class node { 00192 protected: 00194 bool head; 00196 int chunk; 00198 string label; 00200 word * w; 00201 00202 00203 public: 00205 node(); 00206 node(const string &); 00208 string get_label(void) const; 00210 word get_word(void) const; 00212 void set_label(const string &); 00214 void set_word(word &); 00216 bool is_head(void) const; 00218 void set_head(const bool); 00220 bool is_chunk(void) const; 00222 void set_chunk(const int); 00224 int get_chunk_ord(void) const; 00225 00226 00227 }; 00228 00232 00233 class parse_tree : public tree<node> { 00234 public: 00235 parse_tree(); 00236 parse_tree(parse_tree::iterator p); 00237 parse_tree(const node &); 00238 }; 00239 00240 00245 00246 class depnode : public node { 00247 00248 private: 00250 parse_tree::iterator itree; 00252 string dep_source; 00253 string dep_target; 00254 string dep_result; 00255 00256 public: 00257 depnode(); 00258 depnode(const string &); 00259 depnode(const node &); 00260 void set_link(const parse_tree::iterator); 00261 parse_tree::iterator get_link(void); 00262 00263 void set_dep_source(const string &); 00264 void set_dep_target(const string &); 00265 void set_dep_result(const string &); 00266 string get_dep_source(void) const; 00267 string get_dep_target(void) const; 00268 string get_dep_result(void) const; 00269 00270 }; 00271 00272 00273 00277 00278 class dep_tree : public tree<depnode> { 00279 public: 00280 dep_tree(); 00281 dep_tree(const depnode &); 00282 }; 00283 00284 00290 00291 class sentence : public vector<word> { 00292 private: 00293 parse_tree pt; 00294 dep_tree dt; 00295 00296 public: 00297 sentence(); 00298 00299 void set_parse_tree(const parse_tree &); 00300 parse_tree & get_parse_tree(void); 00301 bool is_parsed() const; 00302 00303 dep_tree & get_dep_tree(); 00304 void set_dep_tree(const dep_tree &); 00305 bool is_dep_parsed() const; 00306 00308 vector<word> get_words() const; 00310 sentence::iterator words_begin(void); 00311 sentence::const_iterator words_begin(void) const; 00312 sentence::iterator words_end(void); 00313 sentence::const_iterator words_end(void) const; 00314 }; 00315 00320 00321 class paragraph : public list<sentence> {}; 00322 00327 00328 class document : public list<paragraph> { 00329 paragraph title; 00330 }; 00331 00332 00333 #endif 00334