Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members

dataset.h

Go to the documentation of this file.
00001 
00002 //
00003 //    FreeLing - Open Source Language Analyzers
00004 //
00005 //    Copyright (C) 2004   TALP Research Center
00006 //                         Universitat Politecnica de Catalunya
00007 //
00008 //    This library is free software; you can redistribute it and/or
00009 //    modify it under the terms of the GNU Lesser General Public
00010 //    License as published by the Free Software Foundation; either
00011 //    version 2.1 of the License, or (at your option) any later version.
00012 //
00013 //    This library is distributed in the hope that it will be useful,
00014 //    but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016 //    Lesser General Public License for more details.
00017 //
00018 //    You should have received a copy of the GNU Lesser General Public
00019 //    License along with this library; if not, write to the Free Software
00020 //    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00021 //
00022 //    contact: Lluis Padro (padro@lsi.upc.es)
00023 //             TALP Research Center
00024 //             despatx C6.212 - Campus Nord UPC
00025 //             08034 Barcelona.  SPAIN
00026 //
00028 
00029 
00030 #ifndef _DATASET
00031 #define _DATASET
00032 
00033 #include <iostream>
00034 #include "example.h"
00035 
00036 using namespace std;
00037 
00038 
00039 /********************************************************************************/
00040 /*                                                                              */
00041 /*  dataset:  dataset of binary examples                                      */
00042 /*                                                                              */
00043 /********************************************************************************/
00044 
00045 class dataset 
00046 {
00047  private:
00048   
00049   struct mlDatasetNode {
00050     example* ex;          // pointer to the example
00051     mlDatasetNode* next;    // next example, globally
00052     
00053     mlDatasetNode() : 
00054       ex(NULL), next(NULL) 
00055     {}
00056     mlDatasetNode(example* e) 
00057       : ex(e), next(NULL)
00058     {}
00059   };
00060   
00061     
00062   mlDatasetNode* first;
00063   mlDatasetNode* last;
00064   int  _size;
00065   int *_sizes;
00066   int  _dimension;
00067   int  _nlabels;
00068 
00069 public:
00070 
00071   class iterator {
00072     private :
00073       mlDatasetNode* n;
00074 
00075     public:
00076     iterator() 
00077       : n(NULL)
00078       {}
00079 
00080     iterator(mlDatasetNode *n0) 
00081       : n(n0)
00082       {}
00083 
00084     // ~iterator() { cerr << "dataset iterator destroyed\n"; }
00085     
00086     example* operator->() const { return n->ex; }
00087 
00088     example& operator*() const { return *(n->ex); }
00089     
00090     iterator& operator++() { n = n->next; return *this; }
00091     iterator operator++(int) { iterator tmp(this->n); n = n->next; return tmp; }
00092     
00093     bool operator==(const iterator& rhs) { return n == rhs.n; }
00094     bool operator!=(const iterator& rhs) { return n != rhs.n; }
00095     
00096   };
00097 
00098 
00099   dataset(int nlabels);
00100   ~dataset();
00101 
00102   void delete_examples();
00103 
00104 
00105   // input
00106   void read_stream(istream&);
00107   void add_example(example*, mlDatasetNode* = NULL); 
00108 
00109   // consultores
00110   int size() const { return _size; }
00111   int negative_size(int l) const { return _sizes[2*l]; }
00112   int positive_size(int l) const { return _sizes[2*l+1]; }
00113   int nlabels() const { return _nlabels; }
00114   int dimension() const { return _dimension; }
00115   
00116   // recorregut
00117   iterator begin() const { iterator p(first);  return p; }
00118   iterator end() const { iterator p(NULL); return p; }
00119   
00120   void     split(int feature, dataset *ds0, dataset *ds1, bool create_ds_nodes);
00121   
00122   void print(ostream&) const;
00123   void print_sizes(ostream&) const;
00124 
00125 };
00126 
00127 #endif
00128 
00129 
00130 
00131 
00132 
00133 
00134 
00135 
00136 
00137 
00138 
00139 
00140 
00141 
00142 
00143 
00144 
00145 
00146 

Generated on Wed Apr 26 12:55:30 2006 for FreeLing by  doxygen 1.4.4