From 70f3b475c27c5a5498ac1d978ac11ed6f11ec509 Mon Sep 17 00:00:00 2001 From: alonisser Date: Thu, 8 Jun 2017 12:24:43 +0300 Subject: [PATCH 1/9] Allow presence to run alone, logs --- simple/management/commands/syncdata.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/simple/management/commands/syncdata.py b/simple/management/commands/syncdata.py index 7279a7fd..9970f35c 100644 --- a/simple/management/commands/syncdata.py +++ b/simple/management/commands/syncdata.py @@ -66,7 +66,7 @@ class Command(NoArgsDbLogCommand): make_option('--laws', action='store_true', dest='laws', help="download and parse laws"), - make_option('--presence', action='store_true', dest='presence', + make_option(' ', action='store_true', dest='presence', help="download and parse presence"), make_option('--update', action='store_true', dest='update', help="online update of data."), @@ -97,7 +97,7 @@ def _handle_noargs(self, **options): if all_options: process = True - selected_options = [all_options, process, update, laws] + selected_options = [all_options, process, update, laws, presence] if not any(selected_options): logger.error( "no arguments found. doing nothing. \ntry -h for help.\n--all to run the full syncdata flow.\n--update for an online dynamic update.") @@ -437,7 +437,7 @@ def get_approved_bill_text_for_vote(self, vote): logger.exception(u'Exception with approved bill text for vote %s title=%s' % (vote.id, vote.title)) def update_presence(self): - logger.debug("update presence") + logger.info("Starting to update presence") try: (presence, valid_weeks) = parse_presence.parse_presence(filename=os.path.join(DATA_ROOT, 'presence.txt.gz')) @@ -451,6 +451,7 @@ def update_presence(self): c = None for member in Member.current_members.all(): + logger.info("Trying to update presence for %s" % member.pk) if member.id not in presence: logger.error('member %s (id=%d) not found in presence data', member.name, member.id) continue @@ -477,6 +478,8 @@ def update_presence(self): else: date = iso_to_gregorian(*current_timestamp, iso_day=0) current_timestamp = (date + datetime.timedelta(8)).isocalendar()[:2] + logger.info('Finished updating presence') + def update_private_proposal_content_html(self, pp): html = parse_remote.rtf(pp.source_url) From 8fed3e3f595e3579f51dc8837c2d8acfbe5c9e66 Mon Sep 17 00:00:00 2001 From: alonisser Date: Thu, 8 Jun 2017 12:30:31 +0300 Subject: [PATCH 2/9] git ignore on data packages --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d6f4e286..0b362054 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ npm-debug.log *.egg-info presence/presence.txt presence/presence_log.txt +data/datapackage* From eb1cbc1fc4f1b1751d8417fca00520acddd5fcfa Mon Sep 17 00:00:00 2001 From: alonisser Date: Thu, 6 Jul 2017 21:39:30 +0300 Subject: [PATCH 3/9] Adapting crontab to file on server --- deploy/crontab.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/deploy/crontab.txt b/deploy/crontab.txt index ce7de61f..51a91de3 100644 --- a/deploy/crontab.txt +++ b/deploy/crontab.txt @@ -4,8 +4,11 @@ 45 03 * * * /oknesset_data/oknesset/Open-Knesset/manage.py parse_plenum_protocols --download --parse 2>&1 | /usr/bin/logger -t open_knesset 00 04 * * * /oknesset_data/oknesset/Open-Knesset/manage.py parse_future_plenum_meetings 2>&1 | /usr/bin/logger -t open_knesset 15 04 * * * /oknesset_data/oknesset/Open-Knesset/manage.py syncdata --update 2>&1 | /usr/bin/logger -t open_knesset -34 04 * * * /oknesset_data/oknesset/Open-Knesset/manage.py scrape_committee_meetings --from_days=100 2>&1 | /usr/bin/logger -t open_knesset -12 04 * * * /oknesset_data/oknesset/Open-Knesset/manage.py scrape_committees 2>&1 | /usr/bin/logger -t open_knesset + +# the committee scrapers are handled as part of download_knesset_datapackage management command + +# 34 04 * * * /oknesset_data/oknesset/Open-Knesset/manage.py scrape_committee_meetings --from_days=100 2>&1 | /usr/bin/logger -t open_knesset +# 12 04 * * * /oknesset_data/oknesset/Open-Knesset/manage.py scrape_committees 2>&1 | /usr/bin/logger -t open_knesset 59 04 * * * /oknesset_data/oknesset/Open-Knesset/manage.py send_email_to_editors 2>&1 | /usr/bin/logger -t open_knesset 00 05 * * * /oknesset_data/oknesset/Open-Knesset/manage.py notify --daily 2>&1 | /usr/bin/logger -t open_knesset 01 05 * * 5 /oknesset_data/oknesset/Open-Knesset/manage.py notify --weekly 2>&1 | /usr/bin/logger -t open_knesset From 73f3ae922c4ea40bde5bf2a701819d86ab932736 Mon Sep 17 00:00:00 2001 From: alonisser Date: Thu, 6 Jul 2017 21:40:00 +0300 Subject: [PATCH 4/9] Fixing uploading a protocol in meeting view didn't pass mks to parse --- committees/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/committees/views.py b/committees/views.py index 69aff9a5..996b4f5a 100644 --- a/committees/views.py +++ b/committees/views.py @@ -287,9 +287,10 @@ def _handle_add_protocol(self, cm, request): if not cm.protocol_text: # don't override existing protocols cm.protocol_text = request.POST.get('protocol_text') cm.save() - cm.create_protocol_parts() mks, mk_names = get_all_mk_names() cm.find_attending_members(mks, mk_names) + cm.create_protocol_parts(mks=mks, mk_names=mk_names) + def _handle_remove_lobbyist(self, cm, request): lobbyist_name = request.POST.get('lobbyist_name') From c5779f8ccf962ce39ec961ca2a4c3817ca08b9a9 Mon Sep 17 00:00:00 2001 From: alonisser Date: Thu, 6 Jul 2017 21:40:40 +0300 Subject: [PATCH 5/9] Removing unused imports. TODO: on problemtatic code I found but didn't fix --- committees/models.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/committees/models.py b/committees/models.py index 3facbba8..b88b075c 100644 --- a/committees/models.py +++ b/committees/models.py @@ -21,9 +21,7 @@ from lobbyists.models import LobbyistCorporation from itertools import groupby from hebrew_numbers import gematria_to_int -from mks.utils import get_all_mk_names -from knesset_data.protocols.committee import \ - CommitteeMeetingProtocol as KnessetDataCommitteeMeetingProtocol + from knesset_data_django.committees import members_extended COMMITTEE_PROTOCOL_PAGINATE_BY = 120 @@ -287,6 +285,7 @@ def reparse_protocol(self, redownload=True, mks=None, mk_names=None): reparse_protocol(self, redownload, mks, mk_names) def update_from_dataservice(self, dataservice_object=None): + # TODO: obviousely broken, not sure what was here originaly and where it moved from committees.management.commands.scrape_committee_meetings import \ Command as ScrapeCommitteeMeetingCommand from knesset_data.dataservice.committees import \ From 0550d83a752596887e27eb920e10aab39328da7a Mon Sep 17 00:00:00 2001 From: alonisser Date: Thu, 6 Jul 2017 21:40:57 +0300 Subject: [PATCH 6/9] Cleaning imports --- simple/management/commands/syncdata.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/simple/management/commands/syncdata.py b/simple/management/commands/syncdata.py index 9970f35c..e0200162 100644 --- a/simple/management/commands/syncdata.py +++ b/simple/management/commands/syncdata.py @@ -15,7 +15,6 @@ from django.conf import settings from django.contrib.contenttypes.models import ContentType -from django.db.models import Max from okscraper_django.management.base_commands import NoArgsDbLogCommand from pyth.plugins.rtf15.reader import Rtf15Reader @@ -23,10 +22,10 @@ from committees.models import Committee, CommitteeMeeting from knesset.utils import cannonize from knesset.utils import send_chat_notification -from laws.models import (Vote, VoteAction, Bill, Law, PrivateProposal, +from laws.models import (Vote, Bill, Law, PrivateProposal, KnessetProposal, GovProposal, GovLegislationCommitteeDecision) from links.models import Link -from mks.models import Member, Party, Membership, WeeklyPresence, Knesset +from mks.models import Member, WeeklyPresence, Knesset from persons.models import Person, PersonAlias @@ -37,7 +36,7 @@ from simple.parsers import parse_laws from simple.parsers import parse_remote from simple.parsers.parse_gov_legislation_comm import ParseGLC -from simple.parsers import mk_info_html_parser as mk_parser + from simple.parsers import parse_presence from syncdata_globals import p_explanation, strong_explanation, explanation From 2d88cf1893ae710f1c58ebba60ee0fe78cec345c Mon Sep 17 00:00:00 2001 From: alonisser Date: Fri, 7 Jul 2017 14:42:28 +0300 Subject: [PATCH 7/9] Handling sync token invalidated in google calendars --- events/scrapers.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/events/scrapers.py b/events/scrapers.py index c361b849..def140c1 100644 --- a/events/scrapers.py +++ b/events/scrapers.py @@ -13,6 +13,8 @@ import json import dateutil.parser +GOOGLE_CALENDAR_API_KEY = settings.GOOGLE_CALENDAR_API_KEY + class PersonsEventsScraper(BaseScraper): """ @@ -24,14 +26,14 @@ def __init__(self): self.source = BaseSource() self.storage = BaseStorage() - def _get_google_cal_page(self, calendar_id, sync_token, page_token=None): - api_key = settings.GOOGLE_CALENDAR_API_KEY + def _get_google_cal_page(self, calendar_id, sync_token, page_token=None, is_retry=False): + if page_token is not None: param = '&pageToken=%s' % quote(page_token) else: param = '&syncToken=%s' % quote(sync_token) if sync_token is not None else '' calendar_url = 'https://content.googleapis.com/calendar/v3/calendars/%s/events?showDeleted=true&singleEvents=true%s&key=%s' % ( - quote(calendar_id), param, quote(api_key)) + quote(calendar_id), param, quote(GOOGLE_CALENDAR_API_KEY)) try: response = urllib2.urlopen(calendar_url) data = json.load(response) @@ -42,6 +44,11 @@ def _get_google_cal_page(self, calendar_id, sync_token, page_token=None): res['items'] = data['items'] if 'items' in data else [] return res except urllib2.HTTPError as e: + if e.code == 410 and not is_retry: + # Retry without sync token according to docs + self._getLogger().info(u'retrying calendar with sync token invalidated %s' % calendar_id) + return self._get_google_cal_page(calendar_id=calendar_id, sync_token=None, page_token=page_token, + is_retry=True) self._getLogger().exception( u'Exception in trying to fetch google calendar id %s with url %s' % (calendar_id, calendar_url)) return None From e28194997d6c76fe1ed7e219059c3da77bc9541f Mon Sep 17 00:00:00 2001 From: alonisser Date: Sat, 8 Jul 2017 14:45:47 +0300 Subject: [PATCH 8/9] fix a syntax Warning (yak global usage) --- auxiliary/tag_suggestions/__init__.py | 56 +++++++++++++-------------- ok_tag/tag_suggestions.py | 6 +-- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/auxiliary/tag_suggestions/__init__.py b/auxiliary/tag_suggestions/__init__.py index 7d420720..76da76b5 100644 --- a/auxiliary/tag_suggestions/__init__.py +++ b/auxiliary/tag_suggestions/__init__.py @@ -7,6 +7,7 @@ import operator + def approve(admin, request, tag_suggestions): for tag_suggestion in tag_suggestions: obj = tag_suggestion.object @@ -23,7 +24,7 @@ def approve(admin, request, tag_suggestions): def sum_add_two_dictionaries(dict, dict_to_add): """Takes two dictionaries, assuming their values are numeric, and sum each item that exist in both, writing the merged dictionary to the first dictionary.""" - #go over the dictionary to add + # go over the dictionary to add for key in dict_to_add: if key in dict: dict[key] += dict_to_add[key] @@ -31,50 +32,49 @@ def sum_add_two_dictionaries(dict, dict_to_add): dict[key] = dict_to_add[key] - -#A list of prefix charcters to use in tag extraction +# A list of prefix charcters to use in tag extraction prefixes = [u'ב', u'ו', u'ה', u'מ', u'מה', u'ל', u''] _all_tags_names = [] + def all_tags_names(): '''Lazy intialization of tags list''' - + global _all_tags_names if (_all_tags_names == []): # Extract only used tags, to avoid irrelevant tags vote_tags = Tag.objects.usage_for_model(Vote) bill_tags = Tag.objects.usage_for_model(Bill) cm_tags = Tag.objects.usage_for_model(CommitteeMeeting) all_tags = list(set(vote_tags).union(bill_tags).union(cm_tags)) - - #A list of tags that have been tagged over 10 times in the website - global _all_tags_names + + # A list of tags that have been tagged over 10 times in the website + _all_tags_names = [tag.name for tag in all_tags] - - return _all_tags_names + return _all_tags_names def get_tags_in_text(text): """Returns a dictionary, the keys are tags found in text, and the values are the number of occurrences in text""" - + result_dict = {} words = text.split() if text is not None else [] - - #look for tag in word + + # look for tag in word for tag in all_tags_names(): - #create tag variations according to prefixes + # create tag variations according to prefixes tag_variations = [(p + tag) for p in prefixes] - #find number of occurences of tags for each word - occurence_count = 0 + # find number of occurences of tags for each word + occurence_count = 0 for word in words: if word in tag_variations: occurence_count += 1 - - #if tag found more than once, add them - if occurence_count > 0 : + + # if tag found more than once, add them + if occurence_count > 0: result_dict[tag] = result_dict.get(tag, 0) + occurence_count - + return result_dict @@ -83,17 +83,17 @@ def extract_suggested_tags(current_tags, text_list): and the values are the number of occurrences in arguments text. current_tags are removed from final list. The list is sorted from most occuring tags to least occuring tags''' - + tags_occurrences = {} - - #find occurences of tags in text + + # find occurences of tags in text for text_to_extract in text_list: sum_add_two_dictionaries(tags_occurrences, get_tags_in_text(text_to_extract)) - - #remove tags that are already tagged + + # remove tags that are already tagged for tag in current_tags: - if tag.name in tags_occurrences: + if tag.name in tags_occurrences: del tags_occurrences[tag.name] - - #sort suggestions - return sorted(tags_occurrences.iteritems(), key=operator.itemgetter(1),reverse=True) + + # sort suggestions + return sorted(tags_occurrences.iteritems(), key=operator.itemgetter(1), reverse=True) diff --git a/ok_tag/tag_suggestions.py b/ok_tag/tag_suggestions.py index 6bea2c8d..fd4d07e0 100644 --- a/ok_tag/tag_suggestions.py +++ b/ok_tag/tag_suggestions.py @@ -39,8 +39,8 @@ def sum_add_two_dictionaries(dict, dict_to_add): def all_tags_names(): '''Lazy intialization of tags list''' - - if (_all_tags_names == []): + global _all_tags_names + if _all_tags_names == []: # Extract only used tags, to avoid irrelevant tags vote_tags = Tag.objects.usage_for_model(Vote) bill_tags = Tag.objects.usage_for_model(Bill) @@ -48,7 +48,7 @@ def all_tags_names(): all_tags = list(set(vote_tags).union(bill_tags).union(cm_tags)) # A list of tags that have been tagged over 10 times in the website - global _all_tags_names + _all_tags_names = [tag.name for tag in all_tags] return _all_tags_names From 2a0d3867f90de3af9ba1f4e179e97333ce9e5396 Mon Sep 17 00:00:00 2001 From: alonisser Date: Sat, 8 Jul 2017 14:45:56 +0300 Subject: [PATCH 9/9] Missing option name. oops --- simple/management/commands/syncdata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simple/management/commands/syncdata.py b/simple/management/commands/syncdata.py index e0200162..87fc47d0 100644 --- a/simple/management/commands/syncdata.py +++ b/simple/management/commands/syncdata.py @@ -65,7 +65,7 @@ class Command(NoArgsDbLogCommand): make_option('--laws', action='store_true', dest='laws', help="download and parse laws"), - make_option(' ', action='store_true', dest='presence', + make_option('--presence', action='store_true', dest='presence', help="download and parse presence"), make_option('--update', action='store_true', dest='update', help="online update of data."),