카테고리 없음
python replit request 설정
devie
2022. 7. 20. 17:38
import requests
from bs4 import BeautifulSoup
arra = requests.get('https://www.indeed.com/jobs?q=python&limit=50')
# > html 전체를 긁어온것
print(arra)
soup = BeautifulSoup(arra.text, 'html.parser')
# print(soup.title)
# print(soup.title.name)
# print(soup.a)
# print(soup.p['class'])
# soup.find_all(class="accessibilityMenu")
# print(soup.find(id="skipToJobs"))
pagination = soup.find('div', {'class': "pagination" })
links = pagination.find_all('a')
pages = []
for link in links[:-1]:
pages.append(int(link.string))
pages = pages[:-1] # 0부터 마지막 전까지
max_page = pages[-1]
Packages
-> requests인데, Python HTTP for Humans. 검색 -> install
https://requests.readthedocs.io/en/latest/
import requests
arra = requests.get('https://www.indeed.com/jobs?q=python&limit=50')
# > html 전체를 긁어온것
print(arra)
-> beautifulsoup4 -> install
https://www.crummy.com/software/BeautifulSoup/bs4/doc/
>> html_doc은 위에서 requests가 가져옴
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_doc, 'html.parser')