Merge pull request #589 from realpython/bs4-update

Add materials for updated bs4 tutorial
realpython · Oct 11, 2024 · 754c56e · 754c56e
2 parents 98d865b + f02992a
commit 754c56e
Show file tree

Hide file tree

Showing 3 changed files with 50 additions and 18 deletions.
diff --git a/web-scraping-bs4/README.md b/web-scraping-bs4/README.md
@@ -1,3 +1,37 @@
-# Build a Web Scraper With Requests and Beautiful Soup
+# Beautiful Soup: Build a Web Scraper With Python
 
-This repository contains [`scrape_jobs.py`](https://github.com/realpython/materials/blob/master/web-scraping-bs4/scrape_jobs.py), which is the sample script built in the Real Python tutorial on how to [Build a Web Scraper With Requests and Beautiful Soup](https://realpython.com/beautiful-soup-web-scraper-python/).
+This repository contains `scraper.py`, which is the sample script built in the Real Python tutorial [Beautiful Soup: Build a Web Scraper With Python](https://realpython.com/beautiful-soup-web-scraper-python/).
+
+## Installation and Setup
+
+1. Create a Python virtual environment
+
+```sh
+$ python -m venv venv/
+$ source venv/bin/activate
+(venv) $
+```
+
+2. Install the requirements
+
+```sh
+(venv) $ pip install -r requirements.txt
+```
+
+## Run the Scraper
+
+Run the scraper script:
+
+```sh
+(venv) $ python scraper.py
+```
+
+You'll see the filtered and formatted Python job listings from the Fake Python job board printed to your console.
+
+## About the Author
+
+Martin Breuss - Email: [email protected]
+
+## License
+
+Distributed under the MIT license. See ``LICENSE`` for more information.
diff --git a/web-scraping-bs4/requirements.txt b/web-scraping-bs4/requirements.txt
@@ -1,7 +1,7 @@
-beautifulsoup4==4.9.3
-certifi==2020.12.5
-chardet==4.0.0
-idna==2.10
-requests==2.25.1
-soupsieve==2.2.1
-urllib3==1.26.4
+beautifulsoup4==4.12.3
+certifi==2024.8.30
+charset-normalizer==3.3.2
+idna==3.10
+requests==2.32.3
+soupsieve==2.6
+urllib3==2.2.3
diff --git a/web-scraping-bs4/scrape_jobs.py → web-scraping-bs4/scraper.py b/web-scraping-bs4/scrape_jobs.py → web-scraping-bs4/scraper.py
@@ -7,22 +7,20 @@
 soup = BeautifulSoup(page.content, "html.parser")
 results = soup.find(id="ResultsContainer")
 
-# Look for Python jobs
-print("PYTHON JOBS\n==============================\n")
 python_jobs = results.find_all(
     "h2", string=lambda text: "python" in text.lower()
 )
-python_job_elements = [
+
+python_job_cards = [
     h2_element.parent.parent.parent for h2_element in python_jobs
 ]
 
-for job_element in python_job_elements:
-    title_element = job_element.find("h2", class_="title")
-    company_element = job_element.find("h3", class_="company")
-    location_element = job_element.find("p", class_="location")
+for job_card in python_job_cards:
+    title_element = job_card.find("h2", class_="title")
+    company_element = job_card.find("h3", class_="company")
+    location_element = job_card.find("p", class_="location")
     print(title_element.text.strip())
     print(company_element.text.strip())
     print(location_element.text.strip())
-    link_url = job_element.find_all("a")[1]["href"]
+    link_url = job_card.find_all("a")[1]["href"]
     print(f"Apply here: {link_url}\n")
-    print()