From f9b6e4d3c7f70390f056d84c54a810732da531b8 Mon Sep 17 00:00:00 2001 From: Aditya <132193415+ascendantaditya@users.noreply.github.com> Date: Tue, 30 Jan 2024 11:02:46 +0530 Subject: [PATCH] commmmmmmmmmmmmmmmmmmmmmmmmmmitted --- basics.ipynb | 0 requirements.txt | 7 +++++ scaper.ipynb | 51 ++++++++++++++++++++++++++++++++++ scraper.py | 11 ++++++++ webscraping.ipynb | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 140 insertions(+) create mode 100644 basics.ipynb create mode 100644 requirements.txt create mode 100644 scaper.ipynb create mode 100644 scraper.py create mode 100644 webscraping.ipynb diff --git a/basics.ipynb b/basics.ipynb new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..76ff958 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +# Day-2 +Flask +Flask-WTF +Flask-SQLAlchemy +beautifulsoup4 +pytz +python-dotenv \ No newline at end of file diff --git a/scaper.ipynb b/scaper.ipynb new file mode 100644 index 0000000..746be86 --- /dev/null +++ b/scaper.ipynb @@ -0,0 +1,51 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}\n", + "url = \"https://www.imdb.com/chart/top/?ref_=nv_mv_250\"\n", + "response = requests.get(url, headers=headers)\n", + "html_content = response.content\n", + "soup = BeautifulSoup(html_content, \"html.parser\")\n", + "movies = soup.find_all(\"div\", class_=\"ipc-title ipc-title--base ipc-title--title ipc-title-link-no-icon ipc-title--on-textPrimary sc-1e00898e-9 jQixeG cli-title\")\n", + "for x in movies:\n", + " print(x.h3.text)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..ba9d5f5 --- /dev/null +++ b/scraper.py @@ -0,0 +1,11 @@ +import requests +from bs4 import BeautifulSoup + +headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} +url = "https://www.imdb.com/chart/top/?ref_=nv_mv_250" +response = requests.get(url, headers=headers) +html_content = response.content +soup = BeautifulSoup(html_content, "html.parser") +movies = soup.find_all("div", class_="ipc-title ipc-title--base ipc-title--title ipc-title-link-no-icon ipc-title--on-textPrimary sc-1e00898e-9 jQixeG cli-title") +for x in movies: + print(x.h3.text) \ No newline at end of file diff --git a/webscraping.ipynb b/webscraping.ipynb new file mode 100644 index 0000000..4cc56d2 --- /dev/null +++ b/webscraping.ipynb @@ -0,0 +1,71 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import requests as rq" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "True\n", + "200\n", + "b'\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n \\n \\n \\n \\n \\n \\n \\n\\n \\n\\n \\n \\n \\n \\n \\n \\n\\n\\n \\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n GitHub - ascendantaditya/pfolio: Thats My Portfolio\\n\\n\\n\\n \\n\\n \\n \\n\\n\\n \\n\\n\\n \\n\\n\\n \\n \\n\\n \\n \\n\\n \\n \\n \\n \\n \\n\\n\\n\\n \\n\\n \\n\\n\\n\\n\\n \\n\\n \\n\\n \\n\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n\\n\\n\\n \\n\\n\\n\\n \\n\\n\\n \\n \\n \\n \\n\\n \\n\\n \\n\\n \\n\\n \\n\\n\\n\\n \\n \\n\\n\\n \\n\\n \\n\\n \\n\\n \\n \\n \\n\\n\\n\\n\\n\\n \\n\\n \\n\\n \\n
\\n \\n\\n\\n
\\n Skip to content\\n \\n \\n \\n \\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n \\n
\\n\\n\\n\\n\\n \\n\\n \\n\\n \\n\\n\\n\\n
\\n \\n\\n
\\n
\\n \\n \\n \\n\\n \\n\\n \\n\\n
\\n \\n
\\n
\\n\\n\\n
\\n
\\n \\n\\n
\\n \\n\\n\\n\">\\n \\n \\n
\\n \\n \\n\\n
\\n Search or jump to...\\n
\\n \\n\\n
\\n \\n\\n \\n\\n \\n
\\n \\n

Search code, repositories, users, issues, pull requests...

\\n
\\n \\n
\\n
\\n \\n
\\n \\n \\n \\n \\n \\n\\n \\n
\\n
\\n
\\n
\\n \\n
\\n
\\n Clear\\n \\n\\n
\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n
\\n \\n
\\n
\\n
\\n\\n \\n
\\n
\\n\\n
\\n
\\n
\\n \\n
\\n \\n\\n \\n
\\n
\\n
\\n

\\n Provide feedback\\n

\\n
\\n
\\n \\n
\\n
\\n
\\n \\n
\\n

We read every piece of feedback, and take your input very seriously.

\\n \\n \\n \\n
\\n
\\n \\n
\\n\\n \\n \\n\\n \\n
\\n
\\n
\\n

\\n Saved searches\\n

\\n

Use saved searches to filter your results more quickly

\\n
\\n
\\n \\n
\\n
\\n
\\n \\n
\\n\\n \\n\\n
\\n
\\n
\\n\\n
\\n
\\n \\n
\\n
\\n
\\n\\n\\n\\n \\n\\n \\n Sign up\\n \\n
\\n
\\n
\\n \\n\\n\\n \\n \\n\\n
\\n\\n\\n\\n\\n\\n\\n\\n\\n
\\n\\n\\n \\n\\n\\n\\n \\n
\\n\\n\\n \\n \\n\\n\\n\\n\\n\\n\\n \\n
\\n
\\n \\n \\n\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n \\n
\\n\\n
\\n\\n
\\n \\n
\\n \\n \\n\\n \\n \\n \\n ascendantaditya\\n \\n /\\n \\n pfolio\\n \\n\\n Public\\n
\\n\\n\\n
\\n\\n
\\n \\n\\n
\\n
\\n\\n
\\n
\\n

\\n Thats My Portfolio\\n

\\n
\\n \\n \\n\\n \\n ascendantaditya.github.io/pfolio/\\n \\n
\\n\\n

License

\\n \\n\\n\\n \\n\\n
\\n
\\n \\n
\\n \\n \\n \\n\\n \\n
\\n
\\n\\n
\\n\\n\\n \\n\\n
\\n\\n \\n\\n\\n\\n\\n
\\n \\n\\n\\n \\n \\n

ascendantaditya/pfolio

\\n
\\n
\\n\\n \\n\\n \\n\\n
\\n
\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n \\n
\\n\\n\\n \\n
\\n
\\n\\n
\\n
\\n
\\n
\\n

About

\\n\\n

\\n Thats My Portfolio\\n

\\n
\\n \\n \\n\\n \\n ascendantaditya.github.io/pfolio/\\n \\n
\\n\\n\\n

Resources

\\n \\n\\n

License

\\n \\n\\n\\n\\n\\n \\n \\n\\n \\n\\n\\n

Stars

\\n \\n\\n

Watchers

\\n \\n\\n

Forks

\\n \\n\\n \\n
\\n\\n
\\n
\\n\\n \\n
\\n
\\n

\\n \\n Releases\\n

\\n\\n
No releases published
\\n\\n
\\n
\\n\\n \\n \\n
\\n
\\n

\\n \\n Packages\\n \\n

\\n\\n\\n
\\n No packages published
\\n
\\n\\n\\n\\n
\\n
\\n\\n \\n \\n\\n \\n \\n \\n
\\n
\\n

Languages

\\n
\\n \\n \\n \\n \\n
\\n\\n\\n
\\n
\\n\\n
\\n
\\n \\n
\\n\\n
\\n\\n\\n
\\n\\n
\\n\\n\\n
\\n
\\n\\n \\n\\n
\\n

Footer

\\n\\n \\n\\n\\n
\\n
\\n \\n \\n \\n\\n\\n \\n © 2024 GitHub, Inc.\\n \\n
\\n\\n \\n
\\n
\\n\\n\\n\\n\\n \\n\\n\\n \\n\\n \\n\\n
\\n
\\n
\\n
\\n\\n \\n\\n\\n\\n\\n\\n \\n\\n
\\n
\\n \\n\\n\\n'\n" + ] + } + ], + "source": [ + "import requests\n", + "\n", + "\n", + "url=\"https://github.com/ascendantaditya/pfolio\"\n", + "response=requests.get(url)\n", + "print(response) \n", + "print(response.ok) #if it is a successful response or not\n", + "print(response.status_code) #gives status code of response. 200 means successful request and 404 means request unsuccessful\n", + "print(response.content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from bs4 import BeautifulSoup" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}