forked from myano/jenni
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclanfuq_mysql_to_sqlite.py
executable file
·212 lines (200 loc) · 9.79 KB
/
clanfuq_mysql_to_sqlite.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
#!/usr/bin/python
# -*- coding: utf-8 -*-
import MySQLdb as mdb
import sys
import sqlite3 as sql
def copy_message_history(mcon, scon):
#userID, message
print "copying message_history"
scur = scon.cursor()
mcur = mcon.cursor()
scur.execute("DROP TABLE IF EXISTS message_history")
scur.execute("CREATE TABLE `message_history` (`userID` bigint DEFAULT (0) NOT NULL, `message` text NOT NULL, `dateStamp` timestamp NOT NULL, CHECK (`userID` >= 0))")
mcur.execute("select count(*) from message_history")
mrows = mcur.fetchone()
total_rows = mrows[0]
cur_row = 0
num_rows_per_query = 1000
print "found %d rows, processing %d at a time" % (total_rows, num_rows_per_query)
while cur_row < total_rows:
print "processing rows %d to %d" % (cur_row, cur_row+num_rows_per_query)
mcur.execute("select userID, message, dateStamp from message_history limit %s, %s", (cur_row, num_rows_per_query))
mrows = mcur.fetchall()
for mrow in mrows:
scur.execute("insert into message_history (userID, message, dateStamp) values(?,?,?)", (mrow[0], unicode(mrow[1].strip('\r\n'),'latin-1'), mrow[2]))
scon.commit()
cur_row += num_rows_per_query
print "cur_row = %d" % (cur_row)
print "copied message_history"
def copy_words(mcon, scon):
#wordID, word
print "copying words"
scur = scon.cursor()
mcur = mcon.cursor()
scur.execute("DROP TABLE IF EXISTS words")
scur.execute("CREATE TABLE `words` (`wordID` integer NOT NULL PRIMARY KEY AUTOINCREMENT, `word` varchar(255) NOT NULL)")
mcur.execute("select count(*) from words")
mrows = mcur.fetchone()
total_rows = mrows[0]
cur_row = 0
num_rows_per_query = 1000
print "found %d rows, processing %d at a time" % (total_rows, num_rows_per_query)
while cur_row < total_rows:
print "processing rows %d to %d" % (cur_row, cur_row+num_rows_per_query)
mcur.execute("select wordID, word from words limit %s, %s", (cur_row, num_rows_per_query))
mrows = mcur.fetchall()
for mrow in mrows:
scur.execute("insert into words (wordID, word) values(?,?)", (mrow[0], unicode(mrow[1].strip('\r\n'),'latin-1')))
scon.commit()
cur_row += num_rows_per_query
print "cur_row = %d" % (cur_row)
print "copied words"
def copy_users(mcon, scon):
#userID, hostname, handle, dateSeen, admin
print "copying users"
scur = scon.cursor()
mcur = mcon.cursor()
scur.execute("DROP TABLE IF EXISTS users")
scur.execute("CREATE TABLE `users` (`userID` integer NOT NULL PRIMARY KEY AUTOINCREMENT, `hostname` varchar(255) NOT NULL, `handle` varchar(255) NOT NULL, `dateSeen` timestamp DEFAULT (datetime(CURRENT_TIMESTAMP, 'localtime')) NOT NULL, `admin` boolean DEFAULT (0) NOT NULL)")
mcur.execute("select count(*) from users")
mrows = mcur.fetchone()
total_rows = mrows[0]
cur_row = 0
num_rows_per_query = 1000
print "found %d rows, processing %d at a time" % (total_rows, num_rows_per_query)
while cur_row < total_rows:
print "processing rows %d to %d" % (cur_row, cur_row+num_rows_per_query)
mcur.execute("select userID, hostname, handle, dateSeen, admin from users limit %s, %s", (cur_row, num_rows_per_query))
mrows = mcur.fetchall()
for mrow in mrows:
scur.execute("insert into users (userID, hostname, handle, dateSeen, admin) values(?,?,?,?,?)", (mrow[0], mrow[1], mrow[2], mrow[3], mrow[4]))
scon.commit()
cur_row += num_rows_per_query
print "cur_row = %d" % (cur_row)
print "copied users"
def copy_defitions(mcon, scon):
#wordID, userID, definition
print "copying definitions"
scur = scon.cursor()
mcur = mcon.cursor()
scur.execute("DROP TABLE IF EXISTS definitions")
scur.execute("CREATE TABLE `definitions` (`wordID` bigint DEFAULT (0) NOT NULL, `userID` bigint DEFAULT (0) NOT NULL, `definition` text NOT NULL, `dateTime` timestamp DEFAULT (datetime(CURRENT_TIMESTAMP, 'localtime')) NOT NULL, CHECK (`wordID` >= 0), CHECK (`userID` >= 0))")
mcur.execute("select count(*) from definitions")
mrows = mcur.fetchone()
total_rows = mrows[0]
cur_row = 0
num_rows_per_query = 1000
print "found %d rows, processing %d at a time" % (total_rows, num_rows_per_query)
while cur_row < total_rows:
print "processing rows %d to %d" % (cur_row, cur_row+num_rows_per_query)
mcur.execute("select wordID, userID, definition, dateTime from definitions limit %s, %s", (cur_row, num_rows_per_query))
mrows = mcur.fetchall()
for mrow in mrows:
scur.execute("insert into definitions (wordID, userID, definition, dateTime) values(?,?,?,?)", (mrow[0], mrow[1], unicode(mrow[2].strip('\r\n'),'latin-1'), mrow[3]))
#print "definition is %s" % (unicode(mrow[2].strip('\r\n'), 'cp1252'))
scon.commit()
cur_row += num_rows_per_query
print "cur_row = %d" % (cur_row)
print "copied definitions"
def generate_words_definitions(mcon, scon):
print "generating words and definitions"
mcur = mcon.cursor()
scur = scon.cursor()
scur.execute("DROP TABLE IF EXISTS definitions")
scur.execute("CREATE TABLE `definitions` (`wordID` bigint DEFAULT (0) NOT NULL, `userID` bigint DEFAULT (0) NOT NULL, `definition` text NOT NULL, `dateTime` timestamp DEFAULT (datetime(CURRENT_TIMESTAMP, 'localtime')) NOT NULL, CHECK (`wordID` >= 0), CHECK (`userID` >= 0))")
scur.execute("DROP TABLE IF EXISTS words")
scur.execute("CREATE TABLE `words` (`wordID` integer NOT NULL PRIMARY KEY AUTOINCREMENT, `word` varchar(255) NOT NULL)")
num_rows_per_query = 1000
cur_row = 0
mcur.execute("SELECT COUNT(*) FROM `message_history`")
mrows = mcur.fetchone()
total_rows = mrows[0]
print "found %d rows, processing %d at a time" % (total_rows, num_rows_per_query)
inserts = 0
while cur_row < total_rows:
print "processing rows %d to %d " % (cur_row, cur_row+num_rows_per_query)
mcur.execute("select `userID`, `message`, `dateStamp` from `message_history` limit %s,%s", (cur_row, num_rows_per_query))
mrows = mcur.fetchall()
for mrow in mrows:
userID = mrow[0]
message = mrow[1].strip()
dateStamp = mrow[2]
fSpace = message.find(' ')
if fSpace != -1:
fWord = message[0:fSpace]
sSpace = message.find(' ', fSpace+1)
if sSpace != -1:
sWord = message[fSpace+1:sSpace].lower()
if sWord == "is":
word = fWord.lower()
scur.execute("SELECT `wordID` from `words` where `word`=?", (unicode(word.strip('\r\n'),'latin-1'),))
srow = scur.fetchone()
if srow:
wordID = srow[0]
else:
scur.execute("INSERT INTO `words`(`word`) VALUES(?)", (unicode(word.strip('\r\n'),'latin-1'),))
wordID = scur.lastrowid
inserts+=1
scur.execute("INSERT INTO `definitions`(`wordID`,`userID`,`definition`,`dateTime`) VALUES(?,?,?,?)", (wordID,userID,unicode(message.strip('\r\n'),'latin-1'),dateStamp))
inserts+=1
if inserts >= 1000:
scon.commit()
inserts = 0
cur_row += num_rows_per_query
print "cur_row = %d" % cur_row
mcur.execute("select count(*) from definitions")
mrows = mcur.fetchone()
total_rows = mrows[0]
cur_row = 0
inserts = 0
print "found %d rows, processing %d at a time" % (total_rows, num_rows_per_query)
while cur_row < total_rows:
print "processing rows %d to %d " % (cur_row, cur_row+num_rows_per_query)
mcur.execute("select wordID, userID, definition, dateTime from definitions limit %s,%s", (cur_row, num_rows_per_query))
mrows = mcur.fetchall()
for mrow in mrows:
wordID = mrow[0]
userID = mrow[1]
message = mrow[2].strip()
dateTime = mrow[3]
fSpace = message.find(' ')
if fSpace != -1:
fWord = message[0:fSpace]
sSpace = message.find(' ', fSpace+1)
if sSpace != -1:
sWord = message[fSpace+1:sSpace].lower()
if sWord == "is":
word = fWord.lower()
scur.execute("SELECT `wordID` from `words` where `word`=?", (unicode(word.strip('\r\n'),'latin-1'),))
srow = scur.fetchone()
if srow:
wordID = srow[0]
else:
scur.execute("INSERT INTO `words`(`word`) VALUES(?)", (unicode(word.strip('\r\n'),'latin-1'),))
wordID = scur.lastrowid
inserts+=1
scur.execute("INSERT INTO `definitions`(`wordID`,`userID`,`definition`,`dateTime`) VALUES(?,?,?,?)", (wordID,userID,unicode(message.strip('\r\n'),'latin-1'),dateStamp))
inserts+=1
if inserts >= 1000:
scon.commit()
inserts = 0
cur_row += num_rows_per_query
print "cur_row = %d" % cur_row
scon.commit()
try:
mcon = mdb.connect('localhost', 'root', 'thisisit', 'clanfuq')
scon = sql.connect('clanfuq.db')
#copy_message_history(mcon, scon)
#copy_users(mcon, scon)
generate_words_definitions(mcon, scon)
#copy_words(mcon, scon)
#copy_defitions(mcon, scon)
except mdb.Error as e:
print "Error %d: %s" % (e.args[0], e.args[1])
except sql.Error as s:
print "Error %s:" % s.args[0]
finally:
if mcon:
mcon.close()
if scon:
scon.close()