From 4da829531da908589236862166dddbc1348ed3db Mon Sep 17 00:00:00 2001 From: martin-martin Date: Tue, 1 Oct 2024 16:52:14 -0400 Subject: [PATCH 1/2] Add updated resources --- web-scraping-bs4/README.md | 36 ++++++++++++++++++- web-scraping-bs4/requirements.txt | 14 ++++---- .../{scrape_jobs.py => scraper.py} | 16 ++++----- 3 files changed, 49 insertions(+), 17 deletions(-) rename web-scraping-bs4/{scrape_jobs.py => scraper.py} (57%) diff --git a/web-scraping-bs4/README.md b/web-scraping-bs4/README.md index f89a71e00e..29aea61735 100644 --- a/web-scraping-bs4/README.md +++ b/web-scraping-bs4/README.md @@ -1,3 +1,37 @@ # Build a Web Scraper With Requests and Beautiful Soup -This repository contains [`scrape_jobs.py`](https://github.com/realpython/materials/blob/master/web-scraping-bs4/scrape_jobs.py), which is the sample script built in the Real Python tutorial on how to [Build a Web Scraper With Requests and Beautiful Soup](https://realpython.com/beautiful-soup-web-scraper-python/). +This repository contains `scraper.py`, which is the sample script built in the Real Python tutorial on how to [Build a Web Scraper With Requests and Beautiful Soup](https://realpython.com/beautiful-soup-web-scraper-python/). + +## Installation and Setup + +1. Create a Python virtual environment + +```sh +$ python -m venv venv/ +$ source venv/bin/activate +(venv) $ +``` + +2. Install the requirements + +```sh +(venv) $ pip install -r requirements.txt +``` + +## Run the Scraper + +Run the scraper script: + +```sh +(venv) $ python scraper.py +``` + +You'll see the filtered and formatted Python job listings from the Fake Python job board printed to your console. + +## About the Author + +Martin Breuss - Email: martin@realpython.com + +## License + +Distributed under the MIT license. See ``LICENSE`` for more information. diff --git a/web-scraping-bs4/requirements.txt b/web-scraping-bs4/requirements.txt index b7a5bb209c..c26a496adf 100644 --- a/web-scraping-bs4/requirements.txt +++ b/web-scraping-bs4/requirements.txt @@ -1,7 +1,7 @@ -beautifulsoup4==4.9.3 -certifi==2020.12.5 -chardet==4.0.0 -idna==2.10 -requests==2.25.1 -soupsieve==2.2.1 -urllib3==1.26.4 +beautifulsoup4==4.12.3 +certifi==2024.8.30 +charset-normalizer==3.3.2 +idna==3.10 +requests==2.32.3 +soupsieve==2.6 +urllib3==2.2.3 diff --git a/web-scraping-bs4/scrape_jobs.py b/web-scraping-bs4/scraper.py similarity index 57% rename from web-scraping-bs4/scrape_jobs.py rename to web-scraping-bs4/scraper.py index b50dd04e56..8f7b0d5159 100644 --- a/web-scraping-bs4/scrape_jobs.py +++ b/web-scraping-bs4/scraper.py @@ -7,22 +7,20 @@ soup = BeautifulSoup(page.content, "html.parser") results = soup.find(id="ResultsContainer") -# Look for Python jobs -print("PYTHON JOBS\n==============================\n") python_jobs = results.find_all( "h2", string=lambda text: "python" in text.lower() ) -python_job_elements = [ + +python_job_cards = [ h2_element.parent.parent.parent for h2_element in python_jobs ] -for job_element in python_job_elements: - title_element = job_element.find("h2", class_="title") - company_element = job_element.find("h3", class_="company") - location_element = job_element.find("p", class_="location") +for job_card in python_job_cards: + title_element = job_card.find("h2", class_="title") + company_element = job_card.find("h3", class_="company") + location_element = job_card.find("p", class_="location") print(title_element.text.strip()) print(company_element.text.strip()) print(location_element.text.strip()) - link_url = job_element.find_all("a")[1]["href"] + link_url = job_card.find_all("a")[1]["href"] print(f"Apply here: {link_url}\n") - print() From 1b1311ded0a3af742712b1fc223ea8a84aac86c9 Mon Sep 17 00:00:00 2001 From: brendaweles <160772586+brendaweles@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:11:19 -0600 Subject: [PATCH 2/2] Language Edit --- web-scraping-bs4/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web-scraping-bs4/README.md b/web-scraping-bs4/README.md index 29aea61735..f64a2ce475 100644 --- a/web-scraping-bs4/README.md +++ b/web-scraping-bs4/README.md @@ -1,6 +1,6 @@ -# Build a Web Scraper With Requests and Beautiful Soup +# Beautiful Soup: Build a Web Scraper With Python -This repository contains `scraper.py`, which is the sample script built in the Real Python tutorial on how to [Build a Web Scraper With Requests and Beautiful Soup](https://realpython.com/beautiful-soup-web-scraper-python/). +This repository contains `scraper.py`, which is the sample script built in the Real Python tutorial [Beautiful Soup: Build a Web Scraper With Python](https://realpython.com/beautiful-soup-web-scraper-python/). ## Installation and Setup