/* PLSI: Probabilistic Latent Semantic Indexing Tool $Id: plsi.h,v 1.8 2003/03/27 08:23:57 taku-ku Exp $; Copyright (C) 2002 Taku Kudo All rights reserved. This is free software with ABSOLUTELY NO WARRANTY. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #ifndef PLSI_PLSI_H #define PLSI_PLSI_H #include #include #include "matrix.h" #include "config.h" namespace PLSI { class PLSI { private: unsigned int D; // size of Documents unsigned int W; // size of terms unsigned int Z; // size of latent class double R; // sum of all weight double **new_pwz; double **new_pdz; double *new_pz; double **old_pwz; double **old_pdz; double *old_pz; double beta; Matrix matrix; double pzdw_norm (unsigned int id, unsigned int iw) { double sum = 0.0; for (unsigned int jz = 0; jz < Z; ++jz) sum += (old_pz[jz] * pow(old_pdz[id][jz] * old_pwz[iw][jz], beta)); return sum; } double pzdw (unsigned int iz, unsigned int id, unsigned int iw, double sum) { return sum == 0 ? 0.0 : old_pz[iz] * pow(old_pdz[id][iz] * old_pwz[iw][iz], beta)/sum; } double pdw (unsigned int id, unsigned int iw) { double sum = 0.0; for (unsigned int iz = 0; iz < Z; ++iz) sum += (old_pz[iz] * old_pdz[id][iz] * old_pwz[iw][iz]); return sum; } double gain (); double init (const char *filename = 0); public: bool learn (unsigned int, double, double, unsigned int, const char*); void clear (); bool write (const char *, unsigned int); bool read (const char *); PLSI (): D(0), W(0), Z(0), R(0.0), new_pwz(0), new_pdz(0), new_pz(0), old_pwz(0), old_pdz(0), old_pz(0), beta(0.0) {}; ~PLSI () { clear (); } }; } #endif