# coding=utf-8  

import lxml, bs4, re, requests


csvContent=''

# file = open('D:\\tyc_demo.htm','rb')
# soup = bs4.BeautifulSoup(file,'html.parser')

resultPage = requests.get("http://mp.weixin.qq.com/s/u_WmkE5meMWuZ81G5gHhBQ")
soup = bs4.BeautifulSoup(resultPage.text,'html.parser')

for link in soup.find_all('a'):

if (link.get('href')).startswith('http://mp.weixin.qq.com') :
# print(link.get('href'))
resultPage = requests.get(link.get('href'))

tempSoup = bs4.BeautifulSoup(resultPage.text,'html.parser')
pics=tempSoup.find_all(attrs={'class': 'rich_media_title'})
title =pics[0].string.strip()

title= title.replace("计算机程序的思维逻辑","")
title= title.replace(")","")
title= title.replace("(","")
title= title.replace(":","")

title= title.strip()
if title[-1:]=="/":
title= title[0:-1]


print(title)
fileName='D:\\Java编程的逻辑\\'+title+'.html'

with open(fileName,'w',encoding='utf-8') as of:
of.write(resultPage.text)


效果如下

爬网页_html