Skip to content

Commit

Permalink
Use real User Agent in project requests
Browse files Browse the repository at this point in the history
  • Loading branch information
rennerocha committed Dec 14, 2022
1 parent 4eb776e commit 3f1beed
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 8 deletions.
3 changes: 3 additions & 0 deletions data_collection/gazette/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
"spidermon.contrib.scrapy.pipelines.ItemValidationPipeline": 400,
"gazette.pipelines.SQLDatabasePipeline": 500,
}
USER_AGENT = (
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0"
)

DOWNLOAD_TIMEOUT = 360

Expand Down
4 changes: 0 additions & 4 deletions data_collection/gazette/spiders/rj_nova_iguacu.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ class RjNovaIguacu(BaseGazetteSpider):
start_date = dt.date(2014, 1, 6)
BASE_URL = "https://www.novaiguacu.rj.gov.br/diario-oficial/"

custom_settings = {
"USER_AGENT": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0",
}

def start_requests(self):
for date in rrule(DAILY, dtstart=self.start_date, until=self.end_date):
yield scrapy.Request(
Expand Down
4 changes: 0 additions & 4 deletions data_collection/gazette/spiders/rn_mossoro.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ class RnMossoroSpider(BaseGazetteSpider):
allowed_domains = ["jom.prefeiturademossoro.com.br"]
start_date = dt.date(2008, 1, 1)

custom_settings = {
"USER_AGENT": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0",
}

def start_requests(self):
# avoid skipping months if day of start_date is at the end of the month
first_day_of_start_date_month = dt.date(
Expand Down

0 comments on commit 3f1beed

Please sign in to comment.