forked from ecprice/newsdiffs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
82 lines (66 loc) · 2.48 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, DateTime
from datetime import datetime
from sqlalchemy.orm import sessionmaker
import os
try:
os.symlink('database_settings_dev.py', 'database_settings.py')
except OSError:
pass
import database_settings
engine = create_engine(database_settings.location, echo=False)
Base = declarative_base()
Session = sessionmaker(bind=engine)
ancient = datetime(1901, 1, 1)
class Article(Base):
__tablename__ = 'Articles'
id = Column(Integer, primary_key=True)
url = Column(String(255), unique=True)
initial_date = Column(DateTime, nullable=False)
last_update = Column(DateTime, nullable=False)
last_check = Column(DateTime, nullable=False)
def __init__(self, url, initial_date = None):
self.url = url
if initial_date == None:
initial_date = datetime.now()
self.initial_date = initial_date
self.last_update = datetime.now()
self.last_check = ancient
def minutes_since_update(self):
delta = datetime.now() - self.last_update
return delta.seconds // 60 + 24*60*delta.days
def minutes_since_check(self):
delta = datetime.now() - self.last_check
return delta.seconds // 60 + 24*60*delta.days
def __repr__(self):
return "<Article('%s')>" % (self.url)
if __name__ == '__main__':
import sys
import subprocess
if '-s' in sys.argv:
Base.metadata.create_all(engine)
if '--canonicalize' in sys.argv:
def canonicalize_url(url):
url = url.strip()+'?'
return url[:url.find('?')]
session = Session()
for article_row in session.query(Article).all():
newurl = canonicalize_url(article_row.url)
if session.query(Article).filter_by(url=newurl).first() is None:
article_row.url = newurl
else:
session.delete(article_row)
session.commit()
if '-r' in sys.argv:
#reload from git
session = Session()
file_list = subprocess.check_output(['/usr/bin/git', 'ls-files'], cwd='articles')
for fname in file_list.split():
url = 'http://'+fname
art = Article(url)
session.add(art)
if '--reformat' in sys.argv:
txt = open('articles/'+fname).read()
open('articles/'+fname, 'w').write(txt.strip()+'\n')
session.commit()