Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members

adaboost.h

Go to the documentation of this file.
00001 
00002 //
00003 //    FreeLing - Open Source Language Analyzers
00004 //
00005 //    Copyright (C) 2004   TALP Research Center
00006 //                         Universitat Politecnica de Catalunya
00007 //
00008 //    This library is free software; you can redistribute it and/or
00009 //    modify it under the terms of the GNU Lesser General Public
00010 //    License as published by the Free Software Foundation; either
00011 //    version 2.1 of the License, or (at your option) any later version.
00012 //
00013 //    This library is distributed in the hope that it will be useful,
00014 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016 //    Lesser General Public License for more details.
00017 //
00018 //    You should have received a copy of the GNU Lesser General Public
00019 //    License along with this library; if not, write to the Free Software
00020 //    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00021 //
00022 //    contact: Lluis Padro (padro@lsi.upc.es)
00023 //             TALP Research Center
00024 //             despatx C6.212 - Campus Nord UPC
00025 //             08034 Barcelona.  SPAIN
00026 //
00028 
00029 
00030 /*****************************************************************/
00031 /*                                                               */
00032 /*  Class AdaBoostMH                                             */
00033 /*                                                               */
00034 /*****************************************************************/
00035 
00036 #ifndef _ADABOOST
00037 #define _ADABOOST
00038 
00039 #include "dataset.h"
00040 #include <fstream>
00041 #include <iostream>
00042 #include <string>
00043 #include <vector>
00044 
00045 // defined below
00046 class mlABTree; 
00047 
00048 class adaboost {
00049 private:
00050   // class parameters
00051   static double epsilon;
00052   static int    verbose;
00053   static bool   option_initialize_weights;
00054 
00055 
00056   struct wr_holder {
00057     mlABTree *rule;
00058     wr_holder  *next;
00059   };
00060 
00061 
00062   // weakrules linked list
00063   wr_holder  *first;
00064   wr_holder  *last;
00065   wr_holder  *pcl_pointer; // partial classification pointer
00066   int         nrules;
00067   int         nlabels;   
00068   vector<string> labels;  //label names, loaded from model file
00069   string         label_others;
00070 
00071   // output 
00072   ofstream    *out;
00073 
00074   // stopping criterion
00075   struct {
00076     int  n_rounds;
00077     int  max_depth;
00078   } SC;
00079 
00080   // auxiliar learning functions
00081   int  stopping_criterion(int nrounds);
00082   void initialize_weights(dataset *ds);
00083   void update_weights(mlABTree *wr, double Z, dataset *ds);
00084   void add_weak_rule(mlABTree *wr);
00085 
00086   // copy constructor forbidden
00087   adaboost(const adaboost &old_bab); 
00088 
00089 public:
00090   // constructors, destructor and access methods
00091   adaboost(int nl);
00092   adaboost(const string &file);
00093   ~adaboost();
00094   int n_rules();
00095 
00096   int get_nlabels() {return nlabels;}
00097   string get_label(int lb) {return labels[lb];}
00098   string default_class() {return label_others;}
00099 
00100   // classification methods
00101   // Important: pred is an array of predictions, one for each label
00102   //            the function *assigns* its predicion for each label
00103   void classify(input *i,  double pred[]);
00104 
00105   // partial classification
00106   void pcl_ini_pointer();
00107   int  pcl_advance_pointer(int steps);
00108   // Important: pred is an array of predictions, one for each label
00109   //            the function *adds* its predicion for each label
00110   void pcl_classify(input *i, double *pred, int nrules);
00111 
00112   // learning methods
00113   void learn(dataset *ds, int nrounds, int maxdepth);
00114 
00115 
00116   // I/O methods
00117   void set_output(ofstream &os);
00118   void read_from_stream(ifstream &in);
00119   void read_from_file(char* file);
00120 
00121   static void set_verbose(int level);
00122   static void set_epsilon(double eps);
00123   static void set_initialize_weights(bool b);
00124 };
00125 
00126 
00127 
00128 /*****************************************************************/
00129 /*                                                               */
00130 /*  Class mlABTree                                               */
00131 /*                                                               */
00132 /*****************************************************************/
00133 
00134 
00135 class mlABTree {
00136 
00137 private:
00138   // binary tree structure
00139   int         feature;        // 0 when leaf
00140   mlABTree  **sons;           // when no leaf 
00141   double     *predictions;    // when leaf  (array of predicitons, one for each class)
00142 
00143   // learning parameters
00144   static int       nlabels;
00145   static double    epsilon;
00146   static int       max_depth;
00147   static int      *used_features; 
00148   static int       verbose;
00149 
00150   // auxiliar learning functions
00151   static mlABTree* learn_0(dataset *ds, double *Z, int depth);
00152   static int       stopping_criterion(dataset *ds, int depth);
00153   // W is W[2][nlabels][2]
00154   static int       best_feature(dataset *ds, double *W);
00155 
00156   // W is W[ndim][nlabels][2]
00157   static double    Zcalculus(double *W, int ndim);
00158   // W is W[v][nlabels][2]; result is result[nlabels][2]
00159   static void      Cprediction(int v, double *W, double result[]);
00160 
00161   // copy constructor forbidden
00162   mlABTree(const mlABTree &wr0);
00163 
00164 public:
00165   // class parameters
00166   static void set_nlabels(int nl);
00167   static void set_verbose(int level);
00168   static void set_epsilon(double eps);
00169 
00170   // Constructors and destructor
00171 
00172   //  p0 is an array of predicions, one for each label
00173   mlABTree(double *p0);
00174   mlABTree(int f, mlABTree *wrFalse, mlABTree *wrTrue);
00175   ~mlABTree();
00176 
00177   // Classification
00178   // Important: pred is an array of predictions, one for each label
00179   //            the function *adds* its predicion for each label
00180   void classify(input *i,double *pred);
00181 
00182   //  I/O operations
00183   void print(char *carry);
00184   void write_to_stream(ofstream &os);
00185   static mlABTree* read_from_stream(istream &is);
00186 
00187   // learning
00188   static mlABTree* learn(dataset *ds, double *Z, int max_depth0);
00189 };
00190 
00191 
00192 
00193 #endif 

Generated on Wed Apr 26 12:55:30 2006 for FreeLing by  doxygen 1.4.4