额,还在尝试,socket编程还没写,以及函数没封装,问题很多。。。
先写了个获取小说目录的,顺便学一哈regex,结果只会用了(.*)以及regex_search。。
/******************version 0.0.1***************/
// name: getChapterAddress.cpp
// using: get the address of chapter from catalogue of novel
// author:amos
// time:2018/5/4
#include <bits/stdc++.h> using namespace std; int main() { ifstream fp("/home/calco/桌面/novel.txt"); ofstream ft ("/home/calco/桌面/url.txt"); if(!fp.is_open()) { std::perror("File open is failed"); return EXIT_FAILURE; } if(!ft.is_open()) { std::perror("urltxt open is failed"); return EXIT_FAILURE; } char temp[500]; set<string> se; while(fp.getline(temp,500,'\n')) { string s = temp; std::smatch m; std::regex e ("class=\"novel_num\"\>\</div\>\<a href=\"(.+).html"); while (std::regex_search (s,m,e)) { int si = se.size(); se.insert(m[1]); int s1= se.size(); if(si != s1) ft<<m[1]<<endl; s = m.suffix().str(); } } fp.close(); ft.close(); return 0; }
大概就是这样的html
处理完之后就是
/*********************************************************************************************/
然后写了对小说的内容的获取,去除了</br >这类,以及&bnsp;
// name: getChapterContent.cpp // using: get the content of novel // author:amos // time:2018/5/4 #include <bits/stdc++.h> using namespace std; string HandleString1(const string A) { string b = A; string ctr = "nbsp; ";//CSDN这个博客好像是html来展现的所以这里应该是少了个&()nbsp;ps:没有(),打不出 string ctr1 = "<br />"; //讲道理ctr这个有问题,应该是&&,当时没发现导致代码贼混乱,改!
string en = " ";
string::size_type pos = 0; // string::size_type bLen = b.size(); string::size_type changeLen = ctr.size(); string::size_type enLend = en.size(); /*change nbsp; to ' '*/ while((pos=(b.find(ctr,pos)) != (string::npos))) { b.replace(pos-1,changeLen,en); pos += (enLend); }//处理完后发现有sp;残留..就又写了去除sp;的,估计是上面</br >那里出了问题
while((pos=(b.find("sp;",pos)) != (string::npos))) { b.replace(pos,3," "); pos += 3; } /* change <br /> to ' '*/ return b; } string HandleString(string A) //处理</br>...用了最笨的方法 { string test_str = HandleString1(A); int strleng = test_str.length(); for(int i = strleng;i>=0;i--) { if(test_str[i] == '<') { test_str[i] = ' '; for(int j = i+1;j<(strleng-5);j++) test_str[j] = test_str[j+6]; for(int j = strleng-5;j<(strleng);j++) test_str[j] = ' '; } } return test_str; } int main() { ifstream in("/home/calco/桌面/chapter.txt"); ofstream out("/home/calco/桌面/chapterContent.txt"); if(!in.is_open()) { perror("File chapter open is failed"); EXIT_FAILURE; } if(!out.is_open()) { perror("File content open is failed"); EXIT_FAILURE; } char temp[500]; int flag = 0; while(in.getline(temp,500)) { string s = temp; std::smatch m; std::regex e ("novel_title\"\>(.*)\</h"); while(std::regex_search(s,m,e))//获取小说的章节名 { std::regex reg1("^(\\s)*"); std::regex reg2("\\s*$"); string test_str(m[1]); string t(""); test_str = std::regex_replace(test_str,reg1,t); //去除左边的空格 test_str = std::regex_replace(test_str,reg2,t); //去除右边空格 out<<test_str<<endl; s = m.suffix().str(); } if(flag) { std::regex e3 ("\</div"); if(!std::regex_search(s,e3)) { string s_Handled = HandleString(s); out<<s_Handled<<endl; } else break; } if(!flag){ std::regex e2 ("novel_content\"\>"); if(std::regex_search(s,e2)) {flag = 1;} } } in.close(); out.close(); return 0; }
这个没用regex_replace,用的find和repalce,结果问题很大,以后再改吧