每个吧 第一页
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace ConsoleApplication8
{
class Program
{
public static List<MSG> lmsg = new List<MSG>();
static string GetMsg(string url)
{
string s = "";
HttpWebRequest request = null;
HttpWebResponse response = null;
request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "GET";
request.Host = "tieba.baidu.com";
request.KeepAlive = true;
CookieContainer cn = new CookieContainer();
cn.Add(new Cookie("TIEBA_USERTYPE", "3b218806e82b3aa0fd2ae7dc", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("bdshare_firstime", "1461572415344", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("TIEBAUID", "ab202a741c5a937664dafb46", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("BAIDUID", "AA783FFD52E477A7C8D25BBBFA49313D:FG=1", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("PSTM", "1462440736", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("BIDUPSID", "878F94DD4B53B4C29C2452AC7256AD78", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("LONGID", "1759463301", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("H_PS_PSSID", "19636_20023_19685_1436_12897_17948_19570_19805_19558_19808_19842_17001_15294_11963", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("wise_device", "0", "/", ".tieba.baidu.com"));
request.CookieContainer = cn;
request.ContentType = "application/x-www-form-urlencoded";
response = (HttpWebResponse)request.GetResponse();
StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
s = sr.ReadToEnd();
sr.Close();
request.Abort();
response.Close();
return s;
}
static string GetMsg1(string url)
{
string s = "";
HttpWebRequest request = null;
HttpWebResponse response = null;
request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "GET";
request.Host = "tieba.baidu.com";
request.KeepAlive = true;
CookieContainer cn = new CookieContainer();
cn.Add(new Cookie("userFromPsNeedShowTab", "1", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("TIEBA_USERTYPE", "3b218806e82b3aa0fd2ae7dc", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("bdshare_firstime", "1461572415344", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("TIEBAUID", "ab202a741c5a937664dafb46", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("BAIDUID", "AA783FFD52E477A7C8D25BBBFA49313D:FG=1", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("PSTM", "1462440736", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("BIDUPSID", "878F94DD4B53B4C29C2452AC7256AD78", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("LONGID", "1759463301", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("H_PS_PSSID", "19636_20023_19685_1436_12897_17948_19570_19805_19558_19808_19842_17001_15294_11963", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("wise_device", "0", "/", ".tieba.baidu.com"));
request.CookieContainer = cn;
request.ContentType = "application/x-www-form-urlencoded";
response = (HttpWebResponse)request.GetResponse();
StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
s = sr.ReadToEnd();
sr.Close();
request.Abort();
response.Close();
return s;
}
static string GetMsg2(string url)
{
string s = "";
HttpWebRequest request = null;
HttpWebResponse response = null;
request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "GET";
request.Host = "tieba.baidu.com";
request.KeepAlive = true;
CookieContainer cn = new CookieContainer();
cn.Add(new Cookie("TIEBA_USERTYPE", "3b218806e82b3aa0fd2ae7dc", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("bdshare_firstime", "1461572415344", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("TIEBAUID", "ab202a741c5a937664dafb46", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("BAIDUID", "AA783FFD52E477A7C8D25BBBFA49313D:FG=1", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("PSTM", "1462440736", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("BIDUPSID", "878F94DD4B53B4C29C2452AC7256AD78", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("LONGID", "1759463301", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("BDRCVFR[6bMcVWaYtEt]", "mk3SLVN4HKm", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("H_PS_PSSID", "17948", "/", ".tieba.baidu.com"));
cn.Add(new Cookie("wise_device", "0", "/", ".tieba.baidu.com"));
request.CookieContainer = cn;
request.ContentType = "application/x-www-form-urlencoded";
response = (HttpWebResponse)request.GetResponse();
StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
s = sr.ReadToEnd();
sr.Close();
request.Abort();
response.Close();
return s;
}
static List<string> GetURL(string ss)
{
List<string> urllist = new List<string>();
List<string> tmp = new List<string>();
string s = ss;
Regex rgx = new Regex(" j_thread_list clearfix");
tmp = rgx.Split(s).Skip(1).ToList();
foreach(string str in tmp)
{
urllist.Add("http://tieba.baidu.com/p/" + str.Split(',')[0].Split(':')[1]);
}
return urllist;
}
static void Main(string[] args)
{
List<string> url = new List<string>();
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=bigbang&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=exo&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%B0%91%E5%A5%B3%E6%97%B6%E4%BB%A3&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=tfboys&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E7%8E%8B%E5%87%AF&fr=search"); //王凯吧
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%9C%8D%E5%BB%BA%E5%8D%8E&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%BD%AD%E4%BA%8E%E6%99%8F&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%9D%8E%E6%98%93%E5%B3%B0&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%9D%A8%E6%B4%8B&fr=search");
url.Add("http://tieba.baidu.com/f?kw=maroon5&ie=utf-8");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%82%93%E7%B4%AB%E6%A3%8B&fr=search");//邓紫棋吧
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%91%A8%E6%9D%B0%E4%BC%A6&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=T-ara&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=2NE1&fr=search"); //2ne1吧
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=4minute&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=CNBLUE&fr=search");//cnblue
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=WINNER&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=Apink&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=AOA&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%BC%A0%E6%83%A0%E5%A6%B9&fr=search");//张惠妹
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=girlsday");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%98%B2%E5%BC%B9%E5%B0%91%E5%B9%B4%E5%9B%A2&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%99%88%E5%A5%95%E8%BF%85&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%9E%97%E4%BF%8A%E6%9D%B0&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%BC%A0%E9%9D%93%E9%A2%96&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%A2%81%E9%9D%99%E8%8C%B9&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E4%BA%94%E6%9C%88%E5%A4%A9&fr=search");//五月天
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%8B%8F%E6%89%93%E7%BB%BF&fr=search");//苏打绿
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=justinbieber");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=Eminem&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=brunomars");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=samsmith");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=clarkson");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=Rihanna&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%87%AF%E8%92%82%E6%B4%BE%E7%91%9E");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=hebe");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=Taylor%20Swift&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=Ariana%20Grande&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=Adele&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=jessiej");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%9D%8E%E8%8D%A3%E6%B5%A9&fr=search");//李荣浩
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%88%98%E4%BA%A6%E8%8F%B2&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%AD%99%E7%87%95%E5%A7%BF&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%8C%83%E5%86%B0%E5%86%B0&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%83%A1%E6%AD%8C&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%AE%8B%E4%BD%B3&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%B5%B5%E4%B8%BD%E9%A2%96&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%99%88%E5%9D%A4&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%B4%BE%E4%B9%83%E4%BA%AE&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%BB%84%E8%87%B4%E5%88%97&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%88%98%E6%B6%9B&fr=search");//刘涛
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%92%8B%E6%AC%A3&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%9D%B3%E4%B8%9C&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%9D%8E%E6%98%93%E5%B3%B0&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%AD%99%E4%BF%AA&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%AB%98%E5%9C%86%E5%9C%86&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%96%9B%E4%B9%8B%E8%B0%A6&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E7%8E%8B%E5%AD%90%E6%96%87&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E4%BD%95%E7%82%85&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%82%93%E8%B6%85&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%B9%BF%E6%99%97&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=angelababy");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%B0%A2%E5%A8%9C&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%BC%A0%E8%89%BA%E5%85%B4&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%B2%B3%E4%BA%91%E9%B9%8F&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%90%B4%E4%BA%A6%E5%87%A1&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E7%8E%8B%E5%98%89%E5%B0%94&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%B5%B5%E8%96%87&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E7%8E%8B%E5%8A%9B%E5%AE%8F&fr=search");//赵薇
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%BF%AA%E4%B8%BD%E7%83%AD%E5%B7%B4&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E7%8E%8B%E7%A5%96%E8%93%9D&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%9D%8E%E5%86%B0%E5%86%B0&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%AE%8B%E8%8C%9C&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%9E%97%E6%9B%B4%E6%96%B0");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E4%BA%95%E6%9F%8F%E7%84%B6&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%80%AA%E5%A6%AE&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%A7%9A%E6%99%A8&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%83%AD%E7%A2%A7%E5%A9%B7&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%88%98%E8%AF%97%E8%AF%97&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%88%98%E7%83%A8&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%9D%8E%E9%8D%BE%E7%A2%A9&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%87%91%E9%92%9F%E5%9B%BD&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%99%88%E6%99%93&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%9D%8E%E5%85%89%E6%B4%99&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%B0%8FS&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%88%92%E6%B7%87&fr=search");//舒淇
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%9C%B4%E6%B5%B7%E9%95%87&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%BC%B5%E6%99%BA%E9%9C%96&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=jessica");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%99%B3%E6%9F%8F%E9%9C%96&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%94%A1%E5%BA%B7%E6%B0%B8&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%A2%81%E5%92%8F%E4%BB%AA&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%94%A1%E5%8D%93%E5%A6%8D&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E9%8D%BE%E6%AC%A3%E6%BD%BC&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E8%94%A1%E4%BE%9D%E6%9E%97&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=Gary&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%86%AF%E7%BB%8D%E5%B3%B0&fr=search");//冯绍峰
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%AD%99%E7%BA%A2%E9%9B%B7");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E7%BE%85%E5%BF%97%E7%A5%A5&fr=search");
url.Add("http://tieba.baidu.com/f?ie=utf-8&kw=%E6%9E%97%E5%BF%83%E5%A6%82&fr=search");//林心如
//多线程执行 4核速度快4倍 大约30分钟跑完
url.AsParallel().ForAll(s=> {
get_one(s);
});
}
static void get_one(string url)
{
string res1 = GetMsg(url); //进入一个贴吧(比如:王祖蓝吧)的html信息
List<string> url_list = GetURL(res1); //一个贴吧(比如:王祖蓝吧)的所有帖子指向的链接list
//string res2 = GetMsg1(url_list[13]);
//DateTime time_tmp = get_time(res2);
List<string> tmp = new List<string>(); //暂时存储第一页所有帖子的html信息 大约占用空间不超过40M
foreach(string _url in url_list)
{
tmp.Add(GetMsg1(_url)); //所有帖子的html信息获取完毕
}
DateTime time_tmp = DateTime.MinValue;
string res2 = "";
foreach (string _tmp in tmp)
{
DateTime dt = get_time(_tmp);
if (DateTime.Compare(time_tmp, dt) < 0)
{
time_tmp = dt;
res2 = _tmp;
}
} //选出最大时间 以及对应的帖子
//============================================处理需要数据======================
MSG no1 = new MSG();
no1.time = time_tmp;
no1.avatar_url = get_avatar_url(res2);
no1.nickname = get_nickname(res2);
no1.sex = get_sex("http://tieba.baidu.com/home/get/panel?ie=utf-8&un=" + no1.nickname);
no1.msg = get_msg(res2);
no1.img_list = get_img_list(res2);
lmsg.Add(no1);
}
static DateTime get_time(string ss)
{
string s = ss;
Regex rgx = new Regex("l_post j_l_post l_post_bright noborder ");
s = rgx.Split(s)[1];
rgx = new Regex("date":"");
s = rgx.Split(s)[1];
rgx = new Regex("&");
s = rgx.Split(s)[0].Trim();
DateTime time = DateTime.Parse(s);
return time;
}
static string get_avatar_url(string ss)
{
string s = ss;
Regex rgx = new Regex("username=\"");
s = rgx.Split(s)[1];
rgx = new Regex("src=\"");
s = rgx.Split(s)[1];
rgx = new Regex("\"");
s = rgx.Split(s)[0];
return s;
}
static string get_nickname(string ss)
{
string s = ss;
Regex rgx = new Regex("username=\"");
s = rgx.Split(s)[1];
rgx = new Regex("\"");
s = rgx.Split(s)[0];
return s;
}
static string get_sex(string ss)
{
string s = GetMsg2(ss);
Regex rgx = new Regex("sex\":\"");
s = rgx.Split(s)[1];
rgx = new Regex("\"");
s = rgx.Split(s)[0];
return s;
}
static string get_msg(string ss)
{
string s = ss;
Regex rgx = new Regex("d_post_content j_d_post_content clearfix\">");
s = rgx.Split(s)[1];
rgx = new Regex("</div>");
s = rgx.Split(s)[0].Trim().Replace("<br>", "\\n");
rgx = new Regex("<img[\\S\\s]{50,280}>");
s = rgx.Replace(s, "");
rgx = new Regex("<a[\\S\\s]{50,300}\">");
s = rgx.Replace(s, "");
rgx = new Regex("</a>");
s = rgx.Replace(s, "");
return s;
}
static List<string> get_img_list(string ss)
{
List<string> img_list = new List<string>();
List<string> tmp = new List<string>();
string s = ss;
Regex rgx = new Regex("d_post_content j_d_post_content clearfix\">");
s = rgx.Split(s)[1];
rgx = new Regex("</div>");
s = rgx.Split(s)[0].Trim().Replace("<br>", "\\n");
rgx = new Regex("http://imgsrc.baidu.com[\\S\\s]{50,240}g\"");
MatchCollection mc = rgx.Matches(s);
foreach(var str in mc)
{
tmp.Add(str.ToString());
}
rgx = new Regex("\"");
foreach (string str in tmp)
{
img_list.Add(rgx.Replace(str,""));
}
return img_list;
}
}
class MSG
{
public DateTime time { get; set; }
public string avatar_url { get; set; }
public string nickname { get; set; }
public string sex { get; set; }
public string msg { get; set; }
public List<string> img_list { get; set; }
}
}
最新时间的帖子信息