#include #include #include #include #include #include #include #define LOG_FILE "ime.log" namespace { template inline size_t tokenize(char *str, const char *del, Iterator out, size_t max) { char *stre = str + std::strlen(str); const char *dele = del + std::strlen(del); size_t size = 0; while (size < max) { char *n = std::find_first_of(str, stre, del, dele); *n = '\0'; *out++ = str; ++size; if (n == stre) break; str = n + 1; } return size; } void decodeURI(const char *src, std::string &dest) { dest.clear(); while (*src != '\0') { if (*src == '%') { char h = std::toupper(src[1]); char l = std::toupper(src[2]); int vh = std::isalpha(h) ?(10 +(h -'A')) :(h -'0'); int vl = std::isalpha(l) ?(10 +(l -'A')) :(l -'0'); dest += ((vh << 4) + vl); src += 3; } else if (*src == '+') { dest += ' '; src++; } else { dest += *src++; } } } class CGIParam { private: std::map conf_; public: CGIParam() {} virtual ~CGIParam() {} bool clear() { conf_.clear(); } bool parse(const char *key) { clear(); char buf[1024]; std::strncpy(buf, key, sizeof(buf)); char *params[128]; size_t n = tokenize(buf, "&", params, sizeof(params)); for (size_t i = 0; i < n; ++i) { char *pairs[2]; size_t c = tokenize(params[i], "=", pairs, sizeof(pairs)); if (c != 2) continue; std::string value; decodeURI(pairs[1], value); conf_.insert(std::make_pair(pairs[0], value)); } return true; } const std::string getString(const char* key) { std::map::iterator it = conf_.find(key); if (it == conf_.end()) return std::string(""); return it->second; } int getInt(const char* key) { std::map::iterator it = conf_.find(key); if (it == conf_.end()) return 0; return std::atoi(it->second.c_str()); } }; void IMELog(CGIParam ¶m) { const char *remote_addr = std::getenv("REMOTE_ADDR"); if (! remote_addr) remote_addr = ""; const char *referer = std::getenv("HTTP_REFERER"); if (! referer) referer = ""; time_t t = time(0); char *date_local = ctime(&t); date_local[strlen(date_local)-1] = '\0'; std::string query = param.getString("query"); std::string result = param.getString("result"); int sel = param.getInt("sel"); int id = param.getInt("id"); std::ofstream ofs(LOG_FILE, std::ios::app); ofs << remote_addr << " " << "[" << date_local << "] " << query << '\t' << result << '\t' << sel << '\t' << id << '\t' << referer << std::endl; ofs.close(); return; } void IMEConvert(CGIParam ¶m) { std::string query = param.getString("query"); std::string to = param.getString("to"); std::string func = param.getString("func"); if (func.empty()) func = "ImeRequestCallback"; size_t maxsize = 0; std::string arg = "-l1 -Oyomi -d./dic/"; if (to == "alpha") { arg += "alpha"; maxsize = 10; } else if (to == "katakana") { arg += "katakana"; maxsize = 2; } else { arg += "imedic"; maxsize = 10; } MeCab::Tagger *tagger = MeCab::createTagger(arg.c_str()); if (!tagger) { std::cerr << MeCab::getTaggerError() << std::endl; return; } tagger->parseNBestInit(query.c_str()); std::set candhash; std::vector cand; for (size_t n = 0; n < 500; ++n) { const char *result = tagger->next(); if (!result) break; if (candhash.find(result) == candhash.end()) { cand.push_back(result); candhash.insert(result); if (cand.size() == maxsize) break; } } std::cout << "Content-Type: text/javascript; charset=\"UTF-8\";\n\n"; std::cout << func << "(["; for (size_t i = 0; i < cand.size(); ++i) { if (i != 0) std::cout << ","; std::cout << "'"; std::string &str = cand[i]; for (size_t k = 0; k < str.size(); ++k) { if (str[k] == '\'') std::cout << "\\"; std::cout << str[k]; } std::cout << "'"; } std::cout << "]);" << std::endl; return; } } int main(int argc, char **argv) { char *q = getenv("QUERY_STRING"); if (!q || strlen(q) == 0) return 0; CGIParam param; param.parse(q); std::string action = param.getString("action"); if (action == "conv") IMEConvert(param); else if (action == "log") IMELog(param); return 0; }