// // MeCab WebService Server // // Copyright(C) 2005-2007 Taku Kudo // #include #include #include #include #include #include "httpd.h" namespace tiny_http_server { static const char *kLogFile = "mecapi.log"; static const int kPort = 10523; static const int kPreForkNum = 2; void escape_xml(const std::string &input, std::string *output) { output->clear(); for (size_t i = 0; i < input.size(); ++i) { switch(input[i]) { case '<': *output += "<"; break; case '>': *output += ">"; break; case '&': *output += "&"; break; case '\'': *output += "'"; break; default: *output += input[i]; break; } } } void write_xml(std::ostream *os, const std::vector > &results) { std::string w; std::string feature; *os << ""; for (size_t i = 0; i < results.size(); ++i) { escape_xml(results[i].first, &w); escape_xml(results[i].second, &feature); *os << "" << w << "" << feature << ""; } *os << ""; } void escape_json(const std::string &input, std::string *output) { output->clear(); for (size_t i = 0; i < input.size(); ++i) { if (input[i] == '"') { *output += '\\'; } *output += input[i]; } } void write_json(std::ostream *os, const std::string func, const std::vector > &results) { std::string w; std::string feature; *os << func << "(["; for (size_t i = 0; i < results.size(); ++i) { escape_json(results[i].first, &w); escape_json(results[i].second, &feature); *os << "{\"surface\":\"" << w << "\",\"feature\":\"" << feature << "\"}"; if (i != results.size() - 1) *os << ","; } *os << "]);"; } class MecapiWorker: public HTTPWorker { private: MeCab::Tagger *tagger_; std::string message_; void handle_request() { std::string sentence = cgi_param("sentence"); if (sentence.empty()) { sentence = "index.html"; output_header() << "Content-Language: ja\r\n"; output_header() << "Content-Type: text/html; charset=\"UTF-8\";\r\n"; output_body() << message_; send_request("200 OK"); } else { const MeCab::Node *node = tagger_->parseToNode(sentence.c_str(), sentence.size()); if (!node) { bad_request(); return; } std::vector > results; node = node->next; bool is_noun = (cgi_param("filter") == "noun"); static const char *noun_pos = "名詞,"; for (; node && node->stat != MECAB_EOS_NODE; node = node->next) { std::string w(node->surface, node->length); if (is_noun && std::strncmp(noun_pos, node->feature, std::strlen(noun_pos)) != 0) continue; results.push_back(std::make_pair(w, std::string(node->feature))); } std::string format = cgi_param("format"); if (format == "xml" || format.empty()) { format = "xml"; output_body() << ""; write_xml(&output_body(), results); } else if (format == "json") { std::string func = cgi_param("func"); if (func.empty()) func = "func"; write_json(&output_body(), func, results); } else if (format == "text") { for (size_t i = 0; i < results.size(); ++i) { output_body() << results[i].first << '\t' << results[i].second << std::endl; } output_body() << "EOS" << std::endl; } output_header() << "Content-Language: ja\r\n"; output_header() << "Content-Type: "; if (format == "xml") { output_header() << "text/xml"; } else if (format == "json") { output_header() << "text/javascript"; } else { output_header() << "text/plain"; } output_header() << "; charset=\"UTF-8\";\r\n"; send_request("200 OK"); } { time_t t = time(0); char *date_local = ctime(&t); date_local[strlen(date_local)-1] = '\0'; std::string addr = input_header("X-Forwarded-For:"); if (addr.empty()) addr = client_address(); std::ofstream ofs(kLogFile, std::ios::app); ofs << addr << " " << "[" << date_local << "] " << sentence << '\t' << input_header("Referer:") << std::endl; } return; } const char *server_name() const { return "Yet_Another_MECAPI_Server/1.1"; } public: MecapiWorker(): tagger_(0) { tagger_ = MeCab::createTagger("-d ipadic"); if (!tagger_) { std::cerr << MeCab::getTaggerError() << std::endl; std::exit(-1); } std::ifstream ifs("./mecapi_index.html"); if (ifs) { std::string line; while (std::getline(ifs, line)) { message_ += line; message_ += "\n"; } } } virtual ~MecapiWorker() { delete tagger_; } }; } #define OPT " -p port -t prefork_num [-h]" int main(int argc, char **argv) { extern char *optarg; unsigned short port = tiny_http_server::kPort; size_t prefork_num = tiny_http_server::kPreForkNum; std::string dir; int opt; while ((opt = getopt(argc, argv, "t:p:hc:")) != -1) { switch(opt) { case 't': prefork_num = atoi(optarg); break; case 'p': port = atoi(optarg); break; case 'c': dir = std::string(optarg); break; case 'h': default: std::cout << "Usage: " << argv[0] << OPT << std::endl; return -1; } } if (!dir.empty()) chdir(dir.c_str()); tiny_http_server::HTTPWorkerCreatorTempl creator; tiny_http_server::PreForkHTPPServer server(&creator); // main loop return server.run(port, prefork_num); }