/* PLSI: Probabilistic Latent Semantic Indexing Tool $Id: matrix.cpp,v 1.6 2003/03/27 08:23:57 taku-ku Exp $; Copyright (C) 2002 Taku Kudo All rights reserved. This is free software with ABSOLUTELY NO WARRANTY. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #include "matrix.h" #include "common.h" #include #include #include #include #include #include namespace PLSI { feature_node *str2feature_node (const char *s) { try { unsigned int elmnum = 0; unsigned int len = strlen (s); for (unsigned int i = 0; i < len; i++) if (s[i] == ':') elmnum++; feature_node *_x = new feature_node[elmnum + 1]; unsigned int j = 0; for (unsigned int i = 0; j < elmnum && i < len;) { while (i < len && isspace (s[i])) i++; _x[j].index = atoi (s + i); while (i + 1 < len && s[i] != ':') i++; _x[j].value = atof (s + i + 1); j++; while (i < len && !isspace (s[i])) i++; } // dumy index _x[j].index = -1; _x[j].value = 0; return _x; } catch (std::exception &e) { throw std::runtime_error (e.what ()); } } void Matrix::clear () { row = col = 0; for (unsigned int i = 0; i < row; i++) delete [] matrix[i]; delete [] matrix; matrix = 0; } bool Matrix::read (const char *filename) { try { clear (); std::ifstream is (filename); if (! is) throw std::runtime_error ("cannot open "); std::string line; unsigned int i = 0; while (std::getline (is, line)) { if (line.empty () || line[0] == '#') continue; feature_node *fn = str2feature_node (line.c_str()); for (unsigned int j = 0; fn[j].index != -1; j++) col = _max (col, (unsigned int)fn[j].index + 1); matrix = _append (matrix, i, fn, (feature_node *)0); i++; } row = i; unsigned int *tmp = new unsigned int [col]; for (unsigned int i = 0; i < row; i++) { feature_node *fn = this->getFeatureNode(i); for (unsigned int j = 0; fn[j].index != -1; j++) { tmp[fn[j].index]++; } } for (unsigned int i = 0; i < col; i++) { if (tmp[i] == 0) { delete [] tmp; std::strstream ss; ss << "empty feature index (" << i << ") is found " << '\0'; throw std::runtime_error (ss.str()); } } delete [] tmp; is.close(); return true; } catch (std::exception &e) { clear (); std::strstream ss; ss << "Matrix::read(): " << e.what() << " [" << filename << "]" << '\0'; throw std::runtime_error (ss.str()); return false; } } }