方式一,使用requests库

import urllib
import requests
from bs4 import BeautifulSoup  # 第三方包,处理html对象
import re


def search_google(query):
    # 爬取网页html源码
    url = 'https://google.com/search?q=' + urllib.parse.quote(query)
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'}
    response = requests.get(url, headers=headers)

    # 使用BeautifulSoup解析html对象,并使用正则表达式查找目标内容
    results = []
    soup = BeautifulSoup(response.text, 'html.parser')
    for item in soup.find('div', attrs={'class': 'v7W49e'}).find_all('h3', attrs={'class': 'LC20lb MBeuO DKV0Md'}):
        string = str(item.string)
        res = re.findall(r'(?P<name>.+?) - (?P<work>.+?) - (?P<company>SES Satellites) \| LinkedIn', string, flags=re.I | re.S)
        if len(res) > 0:
            results.extend(res)
    return results


if __name__ == '__main__':
    results = search_google(query='site:linkedin.com -inurl:dir "at ses Satellites" "Current"')
    print(results)

方式二,只是用urllib库

import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
import re


def search_google(query):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'}

    # 爬取网页html源码
    url = 'https://www.google.com.hk/search?' + urllib.parse.urlencode({'q': query})
    req = urllib.request.Request(url, headers=headers)  # 填写data时为POST方式,否则将以GET方式请求
    response = urllib.request.urlopen(req)
    html = response.read().decode()

    # 使用BeautifulSoup解析html对象,并使用正则表达式查找目标内容
    results = []
    soup = BeautifulSoup(html, 'html.parser')
    for item in soup.find('div', attrs={'class': 'v7W49e'}).find_all('h3', attrs={'class': 'LC20lb MBeuO DKV0Md'}):
        string = str(item.string)
        res = re.findall(r'(?P<name>.+?) - (?P<work>.+?) - (?P<company>SES Satellites) \| LinkedIn', string, flags=re.I | re.S)
        if len(res) > 0:
            results.extend(res)
    return results


if __name__ == '__main__':
    results = search_google(query='site:linkedin.com -inurl:dir "at ses Satellites" "Current"')
    print(results)