00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 #ifndef _SENSOR
00045 #define _SENSOR
00046
00047 #include "language.h"
00048 #include "regexp.h"
00049 #include "util.h"
00050 #include <string>
00051 #include <set>
00052 #include <map>
00053 #include <iostream>
00054 #include <fstream>
00055 #include <sstream>
00056
00057 using namespace std;
00058
00059
00060 typedef enum { ST_WORD, ST_PHRASE } SensorType;
00061
00062
00063 class Sensor
00064 {
00065 public:
00066
00067
00068 Sensor(string targ) { includeLocation = false; sensorType = ST_WORD; target=targ;}
00069 virtual ~Sensor() {}
00070
00071 bool IncludeLocation() { return includeLocation; }
00072 void IncludeLocation(bool val) { includeLocation = val; }
00073 SensorType getSensorType() { return sensorType; }
00074 string solve_target(const sentence &sent, int rec) {
00075 string ins;
00076 if (target=="w") ins = sent[rec].get_form();
00077 else if (target=="l") ins = sent[rec].get_lemma();
00078 else if (target=="t") ins = sent[rec].get_parole();
00079 return(ins);
00080 }
00081
00082
00083
00084 virtual void Extract( const sentence &sent,
00085 set<string> &outSet,
00086 int rec_OR_start,
00087 int targLoc_OR_end ) = 0;
00088
00089 protected:
00090 void Output( set<string> &outSet, string feat, int loc);
00091 SensorType sensorType;
00092 string target;
00093
00094 private:
00095 string& PostProcess( string &feat, const char* checkFeat );
00096 bool includeLocation;
00097 };
00098
00099
00100 class SensorData : public Sensor
00101 {
00102 public:
00103 SensorData(string targ) : Sensor(targ) {};
00104
00105 void Extract( const sentence &sent,set<string> &outSet,int rec,int targLoc );
00106 };
00107
00108
00109 class SensorMap : public Sensor
00110 {
00111 public:
00112
00113 SensorMap(char* targ, char* fname, char* dpath) : Sensor(targ) {
00114
00115 char filename[512];
00116
00117 if (fname[0] == '/')
00118 strcpy(filename,fname);
00119 else {
00120 strcpy(filename,dpath);
00121 strcat(filename,fname);
00122 }
00123
00124 ifstream mapFile(filename);
00125 if (!mapFile) {
00126 cerr << filename << " not found" << endl;
00127 exit(-1);
00128 }
00129
00130 string key, data;
00131 while(!mapFile.eof()) {
00132 mapFile>>key>>data;
00133 content.insert(pair<string, string>(key, data));
00134 }
00135 mapFile.close();
00136 };
00137
00138 void Extract( const sentence &sent,set<string> &outSet,int rec,int targLoc );
00139 private:
00140 map<string,string> content;
00141 };
00142
00143
00144 class SensorSet : public Sensor
00145 {
00146 public:
00147 SensorSet(char* targ, char* fname, char* dpath) : Sensor(targ) {
00148
00149 char filename[512];
00150
00151 if (fname[0] == '/')
00152 strcpy(filename,fname);
00153 else {
00154 strcpy(filename,dpath);
00155 strcat(filename,fname);
00156 }
00157
00158 ifstream setFile(filename);
00159 if (!setFile) {
00160 cerr << filename << " not found" << endl;
00161 exit(-1);
00162 }
00163
00164 string key;
00165 while(!setFile.eof()) {
00166 setFile>>key;
00167 content.insert(key);
00168 }
00169 setFile.close();
00170 };
00171
00172 void Extract( const sentence &sent,set<string> &outSet,int rec,int targLoc );
00173
00174 private:
00175 set<string> content;
00176 };
00177
00178 class SensorSetPart : public Sensor {
00179 public:
00180 SensorSetPart(char* targ, char* fname, char* dpath) : Sensor(targ) {
00181
00182 char filename[512];
00183
00184 if (fname[0] == '/')
00185 strcpy(filename,fname);
00186 else {
00187 strcpy(filename,dpath);
00188 strcat(filename,fname);
00189 }
00190
00191 ifstream setFile(filename);
00192 if (!setFile) {
00193 cerr << filename << " not found" << endl;
00194 exit(-1);
00195 }
00196
00197 string key;
00198 while(!setFile.eof()) {
00199 setFile>>key;
00200 content.insert(key);
00201 }
00202 setFile.close();
00203 };
00204
00205 void Extract( const sentence &sent,set<string> &outSet,int rec,int targLoc );
00206
00207 private:
00208 set<string> content;
00209 };
00210
00211
00212 class SensorMatchRE : public Sensor
00213 {
00214 public:
00215 SensorMatchRE(char* targ, char* expr) : Sensor(targ), expression(expr) {};
00216
00217 void Extract( const sentence &sent,set<string> &outSet,int rec,int targLoc );
00218
00219 private:
00220 RegEx expression;
00221 };
00222
00223
00224
00225 class SensorCheckMwRE : public Sensor
00226 {
00227 public:
00228 SensorCheckMwRE(char* targ, char* expr) : Sensor(targ)
00229 {
00230 mw_patrons = util::string2list(string(expr), ';');
00231 };
00232
00233 void Extract( const sentence &sent, set<string> &outSet, int rec, int targLoc );
00234
00235 private:
00236 list<string> mw_patrons;
00237 };
00238
00239
00240
00241 class SensorCheckRE : public Sensor
00242 {
00243 public:
00244
00245 SensorCheckRE(char* targ, char* expr) : Sensor(targ), expression(expr) {};
00246
00247 void Extract( const sentence &sent,set<string> &outSet,int rec,int targLoc );
00248
00249 private:
00250 RegEx expression;
00251 };
00252
00253 #endif
00254