beautiful soup案例
from bs4 import BeautifulSoup
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36"}
url = 'https://news.sina.cn/gn/?from=wap'
web_data = requests.get(url,headers=headers)
web_data.encoding = 'utf-8'
soup = BeautifulSoup(web_data.text,'lxml')
for news in soup.select('section section'):
if(len(news.select('h2')) > 0):
title = news.select('h2')[0].text
img_elm = news.select('img')
image = 'https:'+img_elm[0]['data-src'] if img_elm else 'null'
link = news.select('a')[0]['href']
form_elm = news.select('cite')
form_info = form_elm[0].text if form_elm else 'null'
print(title,image,link,form_info)