From f6ac77c18d2e9997fe4cfce079d6bdf8f466ba92 Mon Sep 17 00:00:00 2001 From: YoungJun Seo Date: Mon, 3 Apr 2017 20:49:39 +0900 Subject: [PATCH] =?UTF-8?q?=ED=99=98=EC=9C=A8=20=EB=8D=B0=EC=9D=B4?= =?UTF-8?q?=ED=84=B0=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../stock_daily_learning.py | 174 ++++++++++-------- .../store_stock_daily_series.py | 133 ++++++++++--- 2 files changed, 206 insertions(+), 101 deletions(-) diff --git a/learning_modules/stock_daily_learning/stock_daily_learning.py b/learning_modules/stock_daily_learning/stock_daily_learning.py index 200a569..be10653 100644 --- a/learning_modules/stock_daily_learning/stock_daily_learning.py +++ b/learning_modules/stock_daily_learning/stock_daily_learning.py @@ -4,36 +4,33 @@ import numpy as np # configuration -# O * W + b -> 3 labels for each time series, O[? 7], W[7 3], B[3] -# ^ (O: output 7 vec from 7 vec input) -# | -# +-+ +-+ +--+ -# |1|->|2|-> ... |60| time_step_size = 60 -# +-+ +-+ +--+ -# ^ ^ ... ^ -# | | | -# time series1:[7] [7] ... [7] -# time series2:[7] [7] ... [7] -# time series3:[7] [7] ... [7] -# ... -# time series(250) or time series(750) (batch_size 250 or test_size 1000 - 250) -# each input size = input_vec_size=lstm_size=7 - +# O * W + b -> 3 labels for each time series, O[? 11], W[11 3], B[3] +# ^ (O: output 11 vec from 11 vec input) +# | +# +-+ +-+ +--+ +# |1|-->|2|--> ... |60| time_step_size = 60 +# +-+ +-+ +--+ +# ^ ^ ... ^ +# | | | +# time series1:[11] [11] ... [11] +# time series2:[11] [11] ... [11] +# time series3:[11] [11] ... [11] +# +# each input size = input_vec_size = lstm_size = 11 + # configuration variables -input_vec_size = lstm_size = 7 +input_vec_size = lstm_size = 11 time_step_size = 60 label_size = 3 evaluate_size = 3 lstm_depth = 4 -total_size = 60000 -batch_size = 15000 -test_size = total_size - batch_size +epoch_size = 100 +batch_size = 20000 +train_rate = 25 # 25% -def init_weights(shape): - return tf.Variable(tf.random_normal(shape, stddev=0.01)) -def model(X, W, B, lstm_size): +def model(X, W, B, lstm_size) : # X, input shape: (batch_size, time_step_size, input_vec_size) XT = tf.transpose(X, [1, 0, 2]) # permute time_step_size and batch_size # XT shape: (time_step_size, batch_size, input_vec_size) @@ -54,13 +51,57 @@ def model(X, W, B, lstm_size): # Get the last output return tf.matmul(outputs[-1], W) + B, cell.state_size # State size to initialize the stat -def read_series_datas(conn, code_dates): + +def get_code_dates() : + conn = sql.connect("../../databases/finance_learning.db") + with conn : + cursor = conn.cursor() + cursor.execute("SELECT DISTINCT code FROM stock_daily_series") + codes = cursor.fetchall() + + cursor = conn.cursor() + cursor.execute("SELECT DISTINCT date FROM stock_daily_series ORDER BY date") + dates = cursor.fetchall()[:-(time_step_size + evaluate_size)] + + code_dates = list() + for date in dates : + for code in codes : + code_dates.append((code[0], date[0])) + + np.random.seed() + np.random.shuffle(code_dates) + + return code_dates + + +def read_series_datas(conn, code_dates) : X = list() Y = list() - for code_date in code_dates: + for code_date in code_dates : cursor = conn.cursor() - cursor.execute("SELECT open, high, low, close, volume, hold_foreign, st_purchase_inst FROM stock_daily_series WHERE code = '{0}' AND date >= '{1}' ORDER BY date LIMIT {2}".format(code_date[0], code_date[1], time_step_size + evaluate_size)) + cursor.execute( + "SELECT " + "stock_daily_series.open, " + "stock_daily_series.high, " + "stock_daily_series.low, " + "stock_daily_series.close, " + "stock_daily_series.volume, " + "stock_daily_series.hold_foreign, " + "stock_daily_series.st_purchase_inst, " + "exchange_daily_series.open, " + "exchange_daily_series.high, " + "exchange_daily_series.low, " + "exchange_daily_series.close " + "FROM stock_daily_series " + "JOIN exchange_daily_series " + "ON stock_daily_series.date = exchange_daily_series.date " + "WHERE stock_daily_series.code = '{0}' " + "AND stock_daily_series.date >= '{1}' " + "AND exchange_daily_series.code = 'FX@KRW' " + "ORDER BY stock_daily_series.date LIMIT {2}" + .format(code_date[0], code_date[1], time_step_size + evaluate_size) + ) items = cursor.fetchall() X.append(np.array(items[:time_step_size])) @@ -69,62 +110,47 @@ def read_series_datas(conn, code_dates): max = items[-evaluate_size][1] min = items[-evaluate_size][2] - for item in items[-evaluate_size + 1:]: - if max < item[1]: + for item in items[-evaluate_size + 1:] : + if max < item[1] : max = item[1] - if item[2] < min: + if item[2] < min : min = item[2] - if (min - price) / price < -0.02: + if (min - price) / price < -0.02 : Y.append((0., 0., 1.)) - elif (max - price) / price > 0.04: + elif (max - price) / price > 0.04 : Y.append((1., 0., 0.)) - else: + else : Y.append((0., 1., 0.)) arrX = np.array(X) norX = (arrX - np.mean(arrX, axis = 0)) / np.std(arrX, axis = 0) - return norX, np.array(Y) -def read_datas(): - conn = sql.connect("../../databases/finance_learning.db") - with conn: - cursor = conn.cursor() - cursor.execute("SELECT DISTINCT code FROM stock_daily_series") - codes = cursor.fetchall() - - cursor = conn.cursor() - cursor.execute("SELECT DISTINCT date FROM stock_daily_series ORDER BY date") - dates = cursor.fetchall()[:-(time_step_size + evaluate_size)] + return norX, np.array(Y) - cnt = total_size - code_dates = list() - for date in dates: - for code in codes: - code_dates.append((code[0], date[0])) - if --cnt <= 0: - break - if --cnt <= 0: - break - np.random.seed() - np.random.shuffle(code_dates) +def read_datas(code_dates) : + conn = sql.connect("../../databases/finance_learning.db") + with conn : + X, Y = read_series_datas(conn, code_dates) - trX = list() - trY = list() - trX, trY = read_series_datas(conn, code_dates[:batch_size]) - teX, teY = read_series_datas(conn, code_dates[-test_size:]) + data_size = len(X) + train_size = data_size * 25 // 100 + test_size = data_size - train_size - return trX, trY, teX, teY + return X[:train_size], Y[:train_size], X[:-test_size], Y[:-test_size] -trX, trY, teX, teY = read_datas() -X = tf.placeholder(tf.float32, [None, time_step_size, input_vec_size]) -Y = tf.placeholder(tf.float32, [None, label_size]) +X = tf.placeholder(tf.float32, [None, time_step_size, input_vec_size], name="input") +Y = tf.placeholder(tf.float32, [None, label_size], name="output") # get lstm_size and output 3 labels -W = init_weights([lstm_size, label_size]) -B = init_weights([label_size]) +W = tf.Variable(tf.random_normal([lstm_size, label_size], stddev=0.1), name="weights") +B = tf.Variable(tf.random_normal([label_size], stddev=0.1), name="biases") + +W_hist = tf.histogram_summary("weights", W) +B_hist = tf.histogram_summary("biases", B) +Y_hist = tf.histogram_summary("output", Y) py_x, state_size = model(X, W, B, lstm_size) @@ -134,19 +160,15 @@ def read_datas(): predict_op = tf.argmax(py_x, 1) # Launch the graph in a session -with tf.Session() as sess: +with tf.Session() as sess : # you need to initialize all variables - tf.global_variables_initializer().run() - - for i in range(100): - for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX)+1, batch_size)): - sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]}) - - test_indices = np.arange(len(teX)) # Get A Test Batch - #np.random.shuffle(test_indices) - test_indices = test_indices[0:test_size] + sess.run(tf.global_variables_initializer()) - org = teY[test_indices] - res = sess.run(predict_op, feed_dict={X: teX[test_indices], Y: teY[test_indices]}) + code_dates = get_code_dates() - print(i, np.mean(np.argmax(org, axis=1) == res)) \ No newline at end of file + for epoch in range(epoch_size) : + for batch in range(len(code_dates) // batch_size) : + trX, trY, teX, teY = read_datas(code_dates[batch_size * batch : batch_size * batch + batch_size]) + sess.run(train_op, feed_dict={X: trX, Y: trY}) + res = sess.run(predict_op, feed_dict={X: teX, Y: teY}) + print("epoch: {0}, batch: {1}, accuracy: {2}".format(epoch, batch, np.mean(np.argmax(teY, 1) == res))) \ No newline at end of file diff --git a/store_modules/store_stock_daily_series/store_stock_daily_series.py b/store_modules/store_stock_daily_series/store_stock_daily_series.py index 8857b75..88c32d5 100644 --- a/store_modules/store_stock_daily_series/store_stock_daily_series.py +++ b/store_modules/store_stock_daily_series/store_stock_daily_series.py @@ -2,12 +2,37 @@ import sqlite3 as sql import win32com.client as com +START_SERIES_DATE = 20040101 #주식 거래원 정보가 제공되기 시작한 날짜 + # 테이블이 없으면 생성 def create_table(conn): - conn.execute("CREATE TABLE IF NOT EXISTS stock_daily_series(code TEXT, date DATE, open INTEGER, high INTEGER, low INTEGER, close INTEGER, volume INTEGER, hold_foreign REAL, st_purchase_inst REAL, PRIMARY KEY(code, date))") + conn.execute( + "CREATE TABLE IF NOT EXISTS stock_daily_series(" + "code TEXT, " + "date DATE, " + "open INTEGER, " + "high INTEGER, " + "low INTEGER, " + "close INTEGER, " + "volume INTEGER, " + "hold_foreign REAL, " + "st_purchase_inst REAL, " + "PRIMARY KEY(code, date)" + ")") + conn.execute( + "CREATE TABLE IF NOT EXISTS exchange_daily_series(" + "code TEXT, " + "date DATE, " + "open REAL, " + "high REAL, " + "low REAL, " + "close REAL, " + "PRIMARY KEY(code, date)" + ")") + -# 테이블에 데이터를 저장 -def save_data(conn, code, stock_chart): +# 주식일간시계열 테이블에 데이터를 저장 +def save_stock_data(conn, code, stock_chart): sql_str = "INSERT INTO stock_daily_series(code, date, open, high, low, close, volume, hold_foreign, st_purchase_inst) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)" cursor = conn.cursor() @@ -18,43 +43,81 @@ def save_data(conn, code, stock_chart): ( code, datetime(dt // 10000, dt // 100 % 100, dt % 100), - stock_chart.GetDataValue(1, i), # 시가 - stock_chart.GetDataValue(2, i), # 고가 - stock_chart.GetDataValue(3, i), # 저가 - stock_chart.GetDataValue(4, i), # 종가 - stock_chart.GetDataValue(5, i), # 거래량 - float(stock_chart.GetDataValue(6, i)), # 외국인 보유수량 - float(stock_chart.GetDataValue(7, i)) # 기관 누적 순매수 + stock_chart.GetDataValue(1, i), # 시가 + stock_chart.GetDataValue(2, i), # 고가 + stock_chart.GetDataValue(3, i), # 저가 + stock_chart.GetDataValue(4, i), # 종가 + stock_chart.GetDataValue(5, i), # 거래량 + float(stock_chart.GetDataValue(6, i)), # 외국인 보유수량 + float(stock_chart.GetDataValue(7, i)) # 기관 누적 순매수 ) ) + conn.commit() -# DB에 저장된 다음날 즉 저장할 데이터의 날자를 얻음 -def get_possible_store_date(conn, code): + +# 환율일간시계열 테이블에 데이터를 저장 +def save_exchange_data(conn, code, exchange_chart): + sql_str = "INSERT INTO exchange_daily_series(code, date, open, high, low, close) VALUES(?, ?, ?, ?, ?, ?)" + cursor = conn.cursor() + possDate = get_possible_exchange_store_date(conn, code) + + for i in range(exchange_chart.GetHeaderValue(3)): + dt = exchange_chart.GetDataValue(0, i) # 날자 + if dt < possDate: + break + + cursor.execute( + sql_str, + ( + code, + datetime(dt // 10000, dt // 100 % 100, dt % 100), + round(exchange_chart.GetDataValue(1, i), 2), # 시가 + round(exchange_chart.GetDataValue(2, i), 2), # 고가 + round(exchange_chart.GetDataValue(3, i), 2), # 저가 + round(exchange_chart.GetDataValue(4, i), 2), # 종가 + ) + ) + + conn.commit() + + +# DB에 주식 데이터가 저장된 다음날 즉 저장할 데이터의 날자를 얻음 +def get_possible_stock_store_date(conn, code): cursor = conn.cursor() cursor.execute("SELECT date FROM stock_daily_series WHERE code = '{0}' ORDER BY date DESC LIMIT 1".format(code)) d = cursor.fetchone() if d == None: - return 20040101 # 데이터가 없으면 거래원 데이터 제공하는 날짜까지 읽음 + return START_SERIES_DATE # 데이터가 없으면 지정한 날까지 읽음 + + dt = datetime.strptime(d[0], "%Y-%m-%d %H:%M:%S") + timedelta(days = 1) + return dt.year * 10000 + dt.month * 100 + dt.day + + +# DB에 주식 데이터가 저장된 다음날 즉 저장할 데이터의 날자를 얻음 +def get_possible_exchange_store_date(conn, code): + cursor = conn.cursor() + cursor.execute("SELECT date FROM exchange_daily_series WHERE code = '{0}' ORDER BY date DESC LIMIT 1".format(code)) + d = cursor.fetchone() + if d == None: + return START_SERIES_DATE # 데이터가 없으면 지정한 날까지 읽음 dt = datetime.strptime(d[0], "%Y-%m-%d %H:%M:%S") + timedelta(days = 1) return dt.year * 10000 + dt.month * 100 + dt.day -conn = sql.connect("../../databases/finance_learning.db") -with conn: - create_table(conn) +def get_stcok_data(conn): stock_chart = com.Dispatch("CpSysDib.StockChart") - stock_chart.SetInputValue(1, ord('1')) # 기간으로 요청 - stock_chart.SetInputValue(5, (0, 2, 3, 4, 5, 8, 16, 21)) # 요청필드(날짜, 시가, 고가, 저가, 종가, 거래량, 외국인 보유수량, 기관 누적 순매수 - stock_chart.SetInputValue(6, ord('D')) # 일간데이터 - stock_chart.SetInputValue(9, ord('1')) # 수정주가 요청 + stock_chart.SetInputValue(1, ord('1')) # 기간으로 요청 + stock_chart.SetInputValue(5, (0, 2, 3, 4, 5, 8, 16, 21)) # 요청필드(날짜, 시가, 고가, 저가, 종가, 거래량, 외국인 보유수량, 기관 누적 순매수 + stock_chart.SetInputValue(6, ord('D')) # 일간데이터 + stock_chart.SetInputValue(9, ord('1')) # 수정주가 요청 code_mgr = com.Dispatch("CpUtil.CpCodeMgr") - for code in code_mgr.GetGroupCodeList(180): # KOSPI 200 - possDate = get_possible_store_date(conn, code) + for code in code_mgr.GetGroupCodeList(180): # KOSPI 200 + possDate = get_possible_stock_store_date(conn, code) stock_chart.SetInputValue(0, code) - stock_chart.SetInputValue(3, possDate) # 종료일 + stock_chart.SetInputValue(3, possDate) # 종료일 if stock_chart.BlockRequest() != 0 or stock_chart.GetDibStatus() != 0: # 오류시 continue @@ -62,9 +125,29 @@ def get_possible_store_date(conn, code): if stock_chart.GetHeaderValue(5) < possDate: # 최종 영업일이 요청일 보다 이전인 경우 Skip continue - save_data(conn, code, stock_chart) + save_stock_data(conn, code, stock_chart) while stock_chart.Continue: if stock_chart.BlockRequest() != 0 or stock_chart.GetDibStatus() != 0: # 오류시 continue - save_data(conn, code, stock_chart) \ No newline at end of file + save_stock_data(conn, code, stock_chart) + + +def get_exchange_data(conn): + code = "FX@KRW" + + exchange_chart = com.Dispatch("DSCBO1.CpSvr8300") + exchange_chart.SetInputValue(0, code) # 코드 + exchange_chart.SetInputValue(1, ord('D')) # 일간데이터 + exchange_chart.SetInputValue(3, 9999) # 요청개수 + + if exchange_chart.BlockRequest() != 0 or exchange_chart.GetDibStatus() != 0: # 오류시 + return + + save_exchange_data(conn, code, exchange_chart) + +conn = sql.connect("../../databases/finance_learning.db") +with conn: + create_table(conn) + get_exchange_data(conn) + get_stcok_data(conn) \ No newline at end of file