Python之爬虫-段子网


#!/usr/bin/env python
# -*- coding:utf-8 -*-
import re
import requests

response = requests.get('https://ishuo.cn')
data = response.text
print(data)
r = re.findall('<div class="content">(.*?)</div>',data)
for i in r:
    print(i)

http://duanziwang.com/

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import re
import requests

response =requests.get('http://duanziwang.com/')
data = response.text
result = re.findall('<a href="http://duanziwang.com/.*?.html">(.*?)</a>',data)



for i in result:
    print(i)