Skip to content

Commit

Permalink
환율 데이터 추가
Browse files Browse the repository at this point in the history
  • Loading branch information
ggomong committed Apr 3, 2017
1 parent e605a96 commit f6ac77c
Show file tree
Hide file tree
Showing 2 changed files with 206 additions and 101 deletions.
174 changes: 98 additions & 76 deletions learning_modules/stock_daily_learning/stock_daily_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,33 @@
import numpy as np

# configuration
# O * W + b -> 3 labels for each time series, O[? 7], W[7 3], B[3]
# ^ (O: output 7 vec from 7 vec input)
# |
# +-+ +-+ +--+
# |1|->|2|-> ... |60| time_step_size = 60
# +-+ +-+ +--+
# ^ ^ ... ^
# | | |
# time series1:[7] [7] ... [7]
# time series2:[7] [7] ... [7]
# time series3:[7] [7] ... [7]
# ...
# time series(250) or time series(750) (batch_size 250 or test_size 1000 - 250)
# each input size = input_vec_size=lstm_size=7

# O * W + b -> 3 labels for each time series, O[? 11], W[11 3], B[3]
# ^ (O: output 11 vec from 11 vec input)
# |
# +-+ +-+ +--+
# |1|-->|2|--> ... |60| time_step_size = 60
# +-+ +-+ +--+
# ^ ^ ... ^
# | | |
# time series1:[11] [11] ... [11]
# time series2:[11] [11] ... [11]
# time series3:[11] [11] ... [11]
#
# each input size = input_vec_size = lstm_size = 11

# configuration variables
input_vec_size = lstm_size = 7
input_vec_size = lstm_size = 11
time_step_size = 60
label_size = 3
evaluate_size = 3
lstm_depth = 4

total_size = 60000
batch_size = 15000
test_size = total_size - batch_size
epoch_size = 100
batch_size = 20000
train_rate = 25 # 25%

def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))

def model(X, W, B, lstm_size):
def model(X, W, B, lstm_size) :
# X, input shape: (batch_size, time_step_size, input_vec_size)
XT = tf.transpose(X, [1, 0, 2]) # permute time_step_size and batch_size
# XT shape: (time_step_size, batch_size, input_vec_size)
Expand All @@ -54,13 +51,57 @@ def model(X, W, B, lstm_size):
# Get the last output
return tf.matmul(outputs[-1], W) + B, cell.state_size # State size to initialize the stat

def read_series_datas(conn, code_dates):

def get_code_dates() :
conn = sql.connect("../../databases/finance_learning.db")
with conn :
cursor = conn.cursor()
cursor.execute("SELECT DISTINCT code FROM stock_daily_series")
codes = cursor.fetchall()

cursor = conn.cursor()
cursor.execute("SELECT DISTINCT date FROM stock_daily_series ORDER BY date")
dates = cursor.fetchall()[:-(time_step_size + evaluate_size)]

code_dates = list()
for date in dates :
for code in codes :
code_dates.append((code[0], date[0]))

np.random.seed()
np.random.shuffle(code_dates)

return code_dates


def read_series_datas(conn, code_dates) :
X = list()
Y = list()

for code_date in code_dates:
for code_date in code_dates :
cursor = conn.cursor()
cursor.execute("SELECT open, high, low, close, volume, hold_foreign, st_purchase_inst FROM stock_daily_series WHERE code = '{0}' AND date >= '{1}' ORDER BY date LIMIT {2}".format(code_date[0], code_date[1], time_step_size + evaluate_size))
cursor.execute(
"SELECT "
"stock_daily_series.open, "
"stock_daily_series.high, "
"stock_daily_series.low, "
"stock_daily_series.close, "
"stock_daily_series.volume, "
"stock_daily_series.hold_foreign, "
"stock_daily_series.st_purchase_inst, "
"exchange_daily_series.open, "
"exchange_daily_series.high, "
"exchange_daily_series.low, "
"exchange_daily_series.close "
"FROM stock_daily_series "
"JOIN exchange_daily_series "
"ON stock_daily_series.date = exchange_daily_series.date "
"WHERE stock_daily_series.code = '{0}' "
"AND stock_daily_series.date >= '{1}' "
"AND exchange_daily_series.code = 'FX@KRW' "
"ORDER BY stock_daily_series.date LIMIT {2}"
.format(code_date[0], code_date[1], time_step_size + evaluate_size)
)
items = cursor.fetchall()

X.append(np.array(items[:time_step_size]))
Expand All @@ -69,62 +110,47 @@ def read_series_datas(conn, code_dates):
max = items[-evaluate_size][1]
min = items[-evaluate_size][2]

for item in items[-evaluate_size + 1:]:
if max < item[1]:
for item in items[-evaluate_size + 1:] :
if max < item[1] :
max = item[1]
if item[2] < min:
if item[2] < min :
min = item[2]

if (min - price) / price < -0.02:
if (min - price) / price < -0.02 :
Y.append((0., 0., 1.))
elif (max - price) / price > 0.04:
elif (max - price) / price > 0.04 :
Y.append((1., 0., 0.))
else:
else :
Y.append((0., 1., 0.))

arrX = np.array(X)
norX = (arrX - np.mean(arrX, axis = 0)) / np.std(arrX, axis = 0)
return norX, np.array(Y)

def read_datas():
conn = sql.connect("../../databases/finance_learning.db")
with conn:
cursor = conn.cursor()
cursor.execute("SELECT DISTINCT code FROM stock_daily_series")
codes = cursor.fetchall()

cursor = conn.cursor()
cursor.execute("SELECT DISTINCT date FROM stock_daily_series ORDER BY date")
dates = cursor.fetchall()[:-(time_step_size + evaluate_size)]
return norX, np.array(Y)

cnt = total_size
code_dates = list()
for date in dates:
for code in codes:
code_dates.append((code[0], date[0]))
if --cnt <= 0:
break
if --cnt <= 0:
break

np.random.seed()
np.random.shuffle(code_dates)
def read_datas(code_dates) :
conn = sql.connect("../../databases/finance_learning.db")
with conn :
X, Y = read_series_datas(conn, code_dates)

trX = list()
trY = list()
trX, trY = read_series_datas(conn, code_dates[:batch_size])
teX, teY = read_series_datas(conn, code_dates[-test_size:])
data_size = len(X)
train_size = data_size * 25 // 100
test_size = data_size - train_size

return trX, trY, teX, teY
return X[:train_size], Y[:train_size], X[:-test_size], Y[:-test_size]

trX, trY, teX, teY = read_datas()

X = tf.placeholder(tf.float32, [None, time_step_size, input_vec_size])
Y = tf.placeholder(tf.float32, [None, label_size])
X = tf.placeholder(tf.float32, [None, time_step_size, input_vec_size], name="input")
Y = tf.placeholder(tf.float32, [None, label_size], name="output")

# get lstm_size and output 3 labels
W = init_weights([lstm_size, label_size])
B = init_weights([label_size])
W = tf.Variable(tf.random_normal([lstm_size, label_size], stddev=0.1), name="weights")
B = tf.Variable(tf.random_normal([label_size], stddev=0.1), name="biases")

W_hist = tf.histogram_summary("weights", W)
B_hist = tf.histogram_summary("biases", B)
Y_hist = tf.histogram_summary("output", Y)

py_x, state_size = model(X, W, B, lstm_size)

Expand All @@ -134,19 +160,15 @@ def read_datas():
predict_op = tf.argmax(py_x, 1)

# Launch the graph in a session
with tf.Session() as sess:
with tf.Session() as sess :
# you need to initialize all variables
tf.global_variables_initializer().run()

for i in range(100):
for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX)+1, batch_size)):
sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})

test_indices = np.arange(len(teX)) # Get A Test Batch
#np.random.shuffle(test_indices)
test_indices = test_indices[0:test_size]
sess.run(tf.global_variables_initializer())

org = teY[test_indices]
res = sess.run(predict_op, feed_dict={X: teX[test_indices], Y: teY[test_indices]})
code_dates = get_code_dates()

print(i, np.mean(np.argmax(org, axis=1) == res))
for epoch in range(epoch_size) :
for batch in range(len(code_dates) // batch_size) :
trX, trY, teX, teY = read_datas(code_dates[batch_size * batch : batch_size * batch + batch_size])
sess.run(train_op, feed_dict={X: trX, Y: trY})
res = sess.run(predict_op, feed_dict={X: teX, Y: teY})
print("epoch: {0}, batch: {1}, accuracy: {2}".format(epoch, batch, np.mean(np.argmax(teY, 1) == res)))
Loading

0 comments on commit f6ac77c

Please sign in to comment.