soup = BeautifulSoup(res.text)
print soup.text
將網頁中的文字取出
print soup.contents
內容
print soup.select()
print soup.select('html')
將所有html 存到list ,但html通常只有一個所以
print soup.select('html')[0]
這樣就是字串
print soup.select('a')
id -> #開頭
class -> .開頭
for item in soup.select('.thread'):
print (soup.select('.subject')[0].text)
這裡容易出錯
for item in soup.select('.item'): print (item.select('strong')[0].text,item.select('.title')[0].text)
import requests import re headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36'} from bs4 import BeautifulSoup res = requests.get('https://www.stockdog.com.tw/stockdog/index.php?m=0&sid=') soup= BeautifulSoup(res.text)
沒有留言:
張貼留言