Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members

regexp.h

Go to the documentation of this file.
00001 //-----------------------------------------------------------------------
00002 //
00003 // regex.hpp 1.0 Copyright (c) 2003 Peter Petersen (pp@on-time.de)
00004 // Simple C++ wrapper for PCRE
00005 //
00006 // This source file is freeware. You may use it for any purpose without
00007 // restriction except that the copyright notice as the top of this file as
00008 // well as this paragraph may not be removed or altered.
00009 //
00010 //-----------------------------------------------------------------------
00011 //
00012 //  CHANGES made to regexp.hpp 1.0 to make it suitable for FreeLing:
00013 //
00014 //    original function:  char * Match(int)
00015 //    converted to:       string Match(int)
00016 //
00017 //    original function:  RegEx (const char *, int)
00018 //    converted to:       RegEx (string, int)
00019 //
00020 //    Added:  Copy constructor: RegEx (const RegEx &)
00021 //            assignation:      RegEx& operator=(const RegEx &)     
00022 //            clone auxiliary:  void clone(const RegEx &)
00023 //
00025 
00026 #ifndef _REGEX_H
00027 #define _REGEX_H
00028 
00029 #include <string>
00030 
00031 #ifndef _PCRE_H
00032 #include "pcre.h"
00033 #endif
00034 
00096 
00097 class RegEx
00098 {
00099    public:
00101       RegEx(const string &regex, int options = 0)
00102       {
00103         const char * error;
00104         int          erroffset;
00105 
00106          re = pcre_compile(regex.c_str(), options, &error, &erroffset, NULL);
00107          if (re == NULL) 
00108            throw error;
00109          pe = pcre_study(re, 0, &error);
00110          pcre_fullinfo(re, pe, PCRE_INFO_CAPTURECOUNT, &substrcount);
00111          substrcount++;
00112          ovector = new int[3*substrcount];
00113          matchlist = NULL;
00114       };
00115 
00117       RegEx (const RegEx &y)
00118       {
00119         clone(y);
00120       };
00121 
00123       RegEx& operator=(const RegEx &y)
00124       {
00125         if (this != &y) {
00126           ClearMatchList();
00127           delete ovector;
00128           if (pe) pcre_free(pe);
00129           pcre_free(re);
00130           clone(y);
00131         }
00132         return (*this);
00133       };
00134 
00136       ~RegEx()
00137       {
00138          ClearMatchList();
00139          delete ovector;
00140          if (pe) pcre_free(pe);
00141          pcre_free(re);
00142       }
00143 
00145       inline int SubStrings(void) const
00146       {
00147          return substrcount;
00148       }
00149 
00151       bool Search(const char * subject, int len = -1, int options = 0)
00152       {
00153          ClearMatchList();
00154          return pcre_exec(re, pe, lastsubject = subject, slen = (len >= 0) ? len : strlen(subject), 0, options, ovector, 3*substrcount) > 0;
00155       }
00156 
00158       bool SearchAgain(int options = 0)
00159       {
00160          ClearMatchList();
00161          return pcre_exec(re, pe, lastsubject, slen, ovector[1], options, ovector, 3*substrcount) > 0;
00162       }
00163 
00165       string Match(int i = 1)
00166       {
00167          if (i < 0)
00168             return lastsubject;
00169          if (matchlist == NULL)
00170             pcre_get_substring_list(lastsubject, ovector, substrcount, &matchlist);
00171 
00172          string mch (matchlist[i], strlen(matchlist[i]));
00173          return mch;
00174       }
00175 
00176    private:
00177 
00179       void clone(const RegEx &y) {
00180         size_t size;
00181         
00182         if ( !y.re ) return;
00183 
00184         pcre_fullinfo(y.re, 0, PCRE_INFO_SIZE, &size);
00185         re = (pcre *) pcre_malloc(size);
00186         if ( !re ) throw "not enough memory";
00187         memcpy(re, y.re, size);
00188 
00189         pcre_fullinfo(y.re, y.pe, PCRE_INFO_STUDYSIZE, &size);
00190         if (size>0) {
00191           pe = (pcre_extra *) pcre_malloc(size + sizeof(pcre_extra)); 
00192           if ( !pe ) throw "not enough memory";
00193           memcpy(pe, y.pe, size+sizeof(pcre_extra));
00194           // study_data needs reset
00195           void * study = (void *)((char *)pe + sizeof(pcre_extra));
00196           pe->study_data = study;
00197         } 
00198         else
00199           pe = NULL;
00200 
00201         substrcount = y.substrcount;
00202         ovector = new int[3*substrcount];
00203         matchlist = NULL; 
00204      }
00205 
00207       inline void ClearMatchList(void)
00208       {
00209          if (matchlist)
00210             pcre_free_substring_list(matchlist),
00211             matchlist = NULL;
00212       }
00213 
00214       pcre * re;
00215       pcre_extra * pe;
00216       int substrcount;
00217       int * ovector;
00218       const char * lastsubject;
00219       int slen;
00220       const char * * matchlist;
00221 };
00222 
00223 
00224 #endif // _REGEX_H

Generated on Wed Apr 26 12:55:30 2006 for FreeLing by  doxygen 1.4.4