//效果截图如下(文章后面附有VS2008本工程下载地址):

VC获取网页标题,解决乱码问题_CHttpFile

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

VC获取网页标题,解决乱码问题_获取网页标题_02

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

VC获取网页标题,解决乱码问题_CInternetSession_03

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

主要代码如下:

//取网页标题
void CGetWebTitleDlg::OnBnClickedBtnGetTitle()
{
m_HtmlCode.SetWindowText(_T(""));//clear

CInternetSession mySession(NULL,0);
CHttpFile* htmlFile=NULL;
CString strLine,url,strHtml;
TCHAR sRecv[1024];
UINT CodePage=65001;//CP_UTF8:65001 CP_ACP:0
m_Url.GetWindowText(url);

TRY
{
htmlFile=(CHttpFile*)mySession.OpenURL(url);//打开连接

//获取网页编码
while(htmlFile->ReadString(sRecv,1024))
{
//先用UTF8来进行转换,如果html页面编码是gbk或gb2312,转换后中文字符为
//乱码,但英文字符显示正常,我们判断html页码编码,通过寻找英文就可以了
int nBufferSize = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)sRecv, -1, NULL, 0);
wchar_t *pBuffer = new wchar_t[nBufferSize+1];
memset(pBuffer,0,(nBufferSize+1)*sizeof(wchar_t));
MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)sRecv, -1 , pBuffer, nBufferSize*sizeof(wchar_t));
strHtml=pBuffer;
if (-1!=strHtml.Find(_T("charset=gbk")))
{
CodePage=0;
delete pBuffer;
break;
}
if (-1!=strHtml.Find(_T("charset=GBK")))//http://www.sohu.com
{
CodePage=0;
delete pBuffer;
break;
}
if (-1!=strHtml.Find(_T("charset=gb2312")))
{
CodePage=0;
delete pBuffer;
break;
}
if (-1!=strHtml.Find(_T("charset=GB2312")))
{
CodePage=0;
delete pBuffer;
break;
}
if (-1!=strHtml.Find(_T("charset=utf-8")))
{
CodePage=65001;
delete pBuffer;
break;
}
if (-1!=strHtml.Find(_T("charset=UTF-8")))
{
CodePage=65001;
delete pBuffer;
break;
}
delete pBuffer;
}
strHtml=_T("");

//获取网页源码
htmlFile=(CHttpFile*)mySession.OpenURL(url);//重新打开连接
while(htmlFile->ReadString(sRecv,1024))
{

// 编码转换,可解决中文乱码问题
//gb2312转为unicode,则用CP_ACP
//gbk转为unicode,也用CP_ACP
//utf-8转为unicode,则用CP_UTF8
int nBufferSize = MultiByteToWideChar(CodePage, 0, (LPCSTR)sRecv, -1, NULL, 0);

wchar_t *pBuffer = new wchar_t[nBufferSize+1];
memset(pBuffer,0,(nBufferSize+1)*sizeof(wchar_t));

//gb2312转为unicode,则用CP_ACP
//gbk转为unicode,也用CP_ACP
//utf-8转为unicode,则用CP_UTF8
MultiByteToWideChar(CodePage, 0, (LPCSTR)sRecv, -1 , pBuffer, nBufferSize*sizeof(wchar_t));

strHtml+=pBuffer;
strHtml+="\r\n";
delete pBuffer;
}
htmlFile->Close();
mySession.Close() ;
delete htmlFile;

m_HtmlCode.SetWindowText(strHtml);//显示网页源码

//获取网页标题
CString szTitle=strHtml.GetString();
int nStart=szTitle.Find(_T("<title>"));
int nEnd=szTitle.Find(_T("</title>"));
szTitle=szTitle.Mid(nStart+7,nEnd-nStart-7);
this->SetWindowText(_T("获取到的网页标题为【")+szTitle+_T("】 By︶风不冷丶"));

}
CATCH (CException, e)
{
TCHAR err[1024];
e->GetErrorMessage(err,1024);
m_HtmlCode.SetWindowText(err);
}
END_CATCH
}



--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

本例子,VS2008工程下载地址

友情提醒:直接在VS2008中运行程序时,会获取不到网页标题,错误信息为:无法解析服务器的名称或地址

这个我现在也不知道是什么原因,知道的望告知 一二,不剩感激。不过你去生成程序的目录,直接运行程序,就不会有以上错误了。

在VS2008直接运行工程获取网页标题,错误截图如下:

VC获取网页标题,解决乱码问题_CHttpFile_04

-------------------------------------------------------------------------------------------------------------

​​

您的十分满意是我追求的宗旨。

您的一点建议是我后续的动力。