-
Scrap remote.comProject using python/Jobs scrapper 2020. 12. 21. 11:53
scrapperRemote.py
import requests from bs4 import BeautifulSoup headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" } def extract_job(html): # title, company, location, link tds = html.find_all("td") link = tds[0].find("a")["href"] if not link: link = "" title = tds[1].find("h2").string if not title: title = "" company = tds[1].find("a").find("h3").string if not company: company = "" location = tds[1].find("div", {"class": "location"}) if not location: location = "" else: location = location.string return { "title": title, "company": company, "location": location, "link": f"https://remoteok.io/{link}", } def extract_jobs(url): jobs=[] result = requests.get(url, headers=headers) soup = BeautifulSoup(result.text, "html.parser") results = soup.find("table", {"id": "jobsboard"}).find_all("tr", {"class": "job"}) for result in results: job = extract_job(result) print(job) jobs.append(job) return jobs def get_WWRJobs(word): url = f"https://remoteok.io/remote-dev+{word}-jobs" jobs = extract_jobs(url) return jobs
scrapperJobs.py
from scrapperSO import get_SOjobs from scrapperWWR import get_WWRJobs from scrapperRemote import get_RemoteJobs def get_jobs(word): SOJobs = get_SOjobs(word) WWRJobs = get_WWRJobs(word) RemoteJobs = get_RemoteJobs(word) jobs = SOJobs + WWRJobs + RemoteJobs return jobs
get_jobs를 함수로 만든다. 인자로 word(=Programming language)를 받으면 그에 해당하는 일자리를 scrap한다.
참고 자료
소스 코드
github.com/zpskek/web_scraper-v2/commit/00727a3ceb119b74a7ab1fd381cea92c907fc51c
'Project using python > Jobs scrapper' 카테고리의 다른 글
search.html (0) 2020.12.21 home.html with Flask framework (0) 2020.12.21 Scrap WeWorkRemotely (0) 2020.12.21 Extract jobs from Stack Overflow (0) 2020.12.21 get_last_page of Stack Overflow (0) 2020.12.21