Skip to content

Commit

Permalink
migrate to: python 3.12, uv, psycopg (version 3)
Browse files Browse the repository at this point in the history
  • Loading branch information
kevindaffaarr committed Nov 27, 2024
1 parent 0435559 commit 4ee19db
Show file tree
Hide file tree
Showing 6 changed files with 1,234 additions and 56 deletions.
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.12
2 changes: 1 addition & 1 deletion database.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
POSTGRES_DB_PASSWORD: str = os.getenv("POSTGRES_DB_PASSWORD","")
POSTGRES_DB_HOST: str = os.getenv("POSTGRES_DB_HOST","")
POSTGRES_DB_NAME: str = os.getenv("POSTGRES_DB_NAME","")
DB_URL: str = f"postgresql://{POSTGRES_DB_USER}:{POSTGRES_DB_PASSWORD}@{POSTGRES_DB_HOST}/{POSTGRES_DB_NAME}"
DB_URL: str = f"postgresql+psycopg://{POSTGRES_DB_USER}:{POSTGRES_DB_PASSWORD}@{POSTGRES_DB_HOST}/{POSTGRES_DB_NAME}"

# Create Engine
engine = create_engine(DB_URL)
Expand Down
56 changes: 23 additions & 33 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,39 +1,29 @@
# uv installation: pip install uv
# create venv: uv venv
# compile dependencies: uv pip compile pyproject.toml -o requirements.txt
# compile dev linux dependencies: uv pip compile pyproject.toml -o requirements_linux.txt --extra dev
# compile dev windows dependencies: uv pip compile pyproject.toml -o requirements_windows.txt --extra dev --extra windows
# single installation example: uv pip install fastapi
# main installation: uv pip install -r requirements.txt
# dev linux installation: uv pip install -r requirements-linux.txt
# dev windows installation: uv pip install -r requirements_windows.txt
# main sync: uv pip sync requirements.txt
# dev linux sync: uv pip sync requirements_linux.txt
# dev windows sync: uv pip sync requirements_windows.txt

[project]
name = "quantist_api"
version = "1.0.0"
version = "1.1.0"
description = "Backend of Quantist.io"
readme = "README.md"
requires-python = "==3.12.*"
dependencies = [
"future",
"fastapi",
"orjson",
"uvicorn[standard]",
"gunicorn",
"plotly",
"kaleido",
"sqlalchemy-bigquery",
"google-cloud-bigquery",
"google-cloud-bigquery-storage",
"SQLAlchemy",
"pyarrow",
"scikit-learn",
"pandas[performance]",
"polars",
"jinja2",
"python-dotenv"
"fastapi[standard]==0.115.5",
"future==1.0.0",
"google-cloud-bigquery==3.27.0",
"jinja2==3.1.4",
"kaleido==0.2.1",
"orjson==3.10.12",
"pandas[performance]==2.2.3",
"plotly==5.24.1",
"polars==1.15.0",
"psycopg==3.2.3",
"python-dotenv==1.0.1",
"scikit-learn==1.5.2",
"sqlalchemy==2.0.36",
"sqlalchemy-bigquery==1.12.0",
"uvicorn[standard]==0.32.1",
]

[project.optional-dependencies]
dev = ["ruff", "asyncio","psycopg2", "pytest"]
windows = ["kaleido==0.1.0.post1"]
dev = [
"pytest==8.3.3",
"ruff==0.8.0",
]
31 changes: 15 additions & 16 deletions quantist_library/brokerflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from .helper import Bin

import polars as pl
# TODO remove to_pandas(use_pyarrow_extension_array=True) from_pandas after polars fully implemented

pd.options.mode.copy_on_write = True
pd.options.future.infer_string = True # type: ignore
Expand Down Expand Up @@ -280,7 +279,7 @@ async def __get_full_broker_transaction(self,
.order_by(db.StockTransaction.date.asc(), db.StockTransaction.broker.asc())

# Main Query Fetching
raw_data_broker_full = pl.read_database(query=qry_main.statement, connection=dbs.bind).to_pandas(use_pyarrow_extension_array=True).reset_index(drop=True).set_index("date") # type: ignore
raw_data_broker_full = pl.read_database(query=qry_main.statement, connection=dbs.bind).to_pandas().reset_index(drop=True).set_index("date") # type: ignore

# Data Cleansing: fillna
raw_data_broker_full.fillna(value=0, inplace=True)
Expand Down Expand Up @@ -327,7 +326,7 @@ async def __get_stock_price_data(self,
qry_main = qry.filter(db.StockData.date.between(startdate, enddate)).order_by(db.StockData.date.asc())

# Main Query Fetching
raw_data_main = pl.read_database(query=qry_main.statement, connection=dbs.bind).to_pandas(use_pyarrow_extension_array=True).reset_index(drop=True).set_index("date") # type: ignore
raw_data_main = pl.read_database(query=qry_main.statement, connection=dbs.bind).to_pandas().reset_index(drop=True).set_index("date") # type: ignore

# Check how many row is returned
if raw_data_main.shape[0] == 0:
Expand All @@ -352,7 +351,7 @@ async def __get_stock_price_data(self,
qry_pre = dbs.query(qry_pre).order_by(qry_pre.c.date.asc()) # type: ignore

# Pre-Data Query Fetching
raw_data_pre = pl.read_database(query=qry_pre.statement, connection=dbs.bind).to_pandas(use_pyarrow_extension_array=True).reset_index(drop=True).set_index("date") # type: ignore
raw_data_pre = pl.read_database(query=qry_pre.statement, connection=dbs.bind).to_pandas().reset_index(drop=True).set_index("date") # type: ignore

# Concatenate Pre and Main Query
raw_data_full = pd.concat([raw_data_pre,raw_data_main])
Expand Down Expand Up @@ -1132,7 +1131,7 @@ async def _get_stockcodes(self,
(db.ListStock.code.not_in(stockcode_excludes_lower))) # type: ignore

# Query Fetching: filtered_stockcodes
stockcodes = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas(use_pyarrow_extension_array=True).reset_index(drop=True)['code'] # type: ignore
stockcodes = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas().reset_index(drop=True)['code'] # type: ignore
return pd.Series(stockcodes)

# Get Net Val Sum Val Broker Transaction
Expand Down Expand Up @@ -1173,7 +1172,7 @@ async def __get_full_broker_transaction(self,
.order_by(db.StockTransaction.code.asc(), db.StockTransaction.date.asc(), db.StockTransaction.broker.asc())

# Main Query Fetching
raw_data_broker_full = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas(use_pyarrow_extension_array=True).reset_index(drop=True).set_index(["code","date"]) # type: ignore
raw_data_broker_full = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas().reset_index(drop=True).set_index(["code","date"]) # type: ignore

# Data Cleansing: fillna
raw_data_broker_full.fillna(value=0, inplace=True)
Expand All @@ -1194,7 +1193,7 @@ async def __get_stock_price_data(self,
qry = dbs.query(db.StockData.code).filter(db.StockData.code.in_(filtered_stockcodes.to_list())).filter(db.StockData.date.between(startdate, enddate)).group_by(db.StockData.code) # type: ignore

# Query Fetching
raw_data = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas(use_pyarrow_extension_array=True) # type: ignore
raw_data = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas() # type: ignore

# Check how many row is returned
if raw_data.shape[0] == 0:
Expand All @@ -1218,7 +1217,7 @@ async def __get_stock_price_data(self,
.order_by(db.StockData.code.asc(), db.StockData.date.asc())

# Main Query Fetching
raw_data_full = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas(use_pyarrow_extension_array=True).reset_index(drop=True).set_index(["code","date"]) # type: ignore
raw_data_full = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas().reset_index(drop=True).set_index(["code","date"]) # type: ignore

# End of Method: Return or Assign Attribute
return raw_data_full
Expand Down Expand Up @@ -1424,7 +1423,7 @@ async def _get_broker_ncum_corr(self,
lambda group_df: group_df.with_columns(pl.corr(pl.exclude('code','date','close'), pl.col('close'))).head(1)
).drop('close').sort('code')

corr_ncum_close = corr.to_pandas(use_pyarrow_extension_array=True).set_index('code').rename_axis('broker', axis='columns')
corr_ncum_close = corr.to_pandas().set_index('code').rename_axis('broker', axis='columns')
return corr_ncum_close

async def _get_bf_parameters(self,
Expand Down Expand Up @@ -1459,7 +1458,7 @@ async def _get_bf_parameters(self,
raw_data_broker_sumval = raw_data_broker_sumval.loc[transaction_true.index[transaction_true]]

# Cumulate volume for nvol
broker_ncum = raw_data_broker_nval.groupby(by='code').cumsum(axis=0)
broker_ncum = raw_data_broker_nval.astype(float).groupby(by='code').cumsum(axis=0)
# Get each broker's sum of transaction value
broker_sumval = raw_data_broker_sumval.groupby(by='code').sum()
# Get correlation between each broker's cumulated transaction and close price
Expand Down Expand Up @@ -1650,7 +1649,7 @@ async def _calc_radar_indicators(self,

# X Axis:
if y_axis_type == dp.ListRadarType.correlation:
selected_broker_nval_cumsum = selected_broker_nval.groupby(level='code').cumsum()
selected_broker_nval_cumsum = selected_broker_nval.astype(float).groupby(level='code').cumsum()
radar_indicators[y_axis_type.value] = selected_broker_nval_cumsum.groupby('code').diff().groupby('code')\
.corrwith(raw_data_full['close'].groupby('code').diff(),axis=0) # type: ignore
elif y_axis_type == dp.ListRadarType.changepercentage:
Expand Down Expand Up @@ -1915,11 +1914,11 @@ async def _get_mf_top_stockcodes(self,
if accum_or_distri == dp.ScreenerList.most_distributed:
top_stockcodes['mf'] = self.selected_broker_nval.loc[
self.selected_broker_nval.index.get_level_values(1).isin(pd.date_range(start=startdate, end=enddate))
].groupby("code").sum().nsmallest(n=n_stockcodes, columns="broker_nval")['broker_nval']
].groupby("code").sum().astype(float).nsmallest(n=n_stockcodes, columns="broker_nval")['broker_nval']
else:
top_stockcodes['mf'] = self.selected_broker_nval.loc[
self.selected_broker_nval.index.get_level_values(1).isin(pd.date_range(start=startdate, end=enddate))
].groupby("code").sum().nlargest(n=n_stockcodes, columns="broker_nval")['broker_nval']
].groupby("code").sum().astype(float).nlargest(n=n_stockcodes, columns="broker_nval")['broker_nval']

# get selected_broker_nval that has level 0 index (code) in top_stockcodes
self.selected_broker_nval = self.selected_broker_nval[self.selected_broker_nval.index.get_level_values(0).isin(top_stockcodes.index)]
Expand All @@ -1942,7 +1941,7 @@ async def _get_mf_top_stockcodes(self,
else:
wvalflow = self.selected_broker_nval.loc[
self.selected_broker_nval.index.get_level_values(1).isin(pd.date_range(start=startdate, end=enddate))
]['broker_nval'].groupby("code").cumsum()
]['broker_nval'].astype(float).groupby("code").cumsum()

top_stockcodes['pricecorrel'] = wvalflow.groupby("code").diff().groupby('code').corr( # type: ignore
self.raw_data_full.loc[
Expand Down Expand Up @@ -2085,7 +2084,7 @@ async def _get_data_from_stocklist(self, stocklist: list) -> tuple[list, pd.Data
# Get data from stocklist
top_data = self.raw_data_full.loc[self.raw_data_full.index.get_level_values('code').isin(stocklist)]
# Sum broker_nval for each code and get top n_stockcodes
stocklist = top_data['broker_nval'].groupby(level='code').sum().nlargest(self.n_stockcodes).index.tolist()
stocklist = top_data['broker_nval'].groupby(level='code').sum().astype(float).nlargest(self.n_stockcodes).index.tolist()

# Get data from stocklist
top_data = self.raw_data_full.loc[self.raw_data_full.index.get_level_values('code').isin(stocklist)]
Expand Down Expand Up @@ -2240,7 +2239,7 @@ async def _get_data_from_stocklist(self,n_stockcodes: int) -> tuple[list[str], p
mf = stocklist_selected_broker_nval.groupby(level='code').tail(self.radar_period).groupby(level='code')['broker_nval'].sum()

# Get top n_stockcodes
stocklist = mf.nlargest(n_stockcodes).index.tolist()
stocklist = mf.astype(float).nlargest(n_stockcodes).index.tolist()

# Compile top_stockcodes
top_stockcodes:pd.DataFrame
Expand Down
18 changes: 12 additions & 6 deletions quantist_library/holdingcomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ class HoldingComposition():
"""
def __init__(self,
stockcode: str,
startdate: datetime.date|None = None,
enddate: datetime.date|None = datetime.date.today().replace(day=1) - datetime.timedelta(days=1),
startdate: datetime.date|pd.Timestamp|None = None,
enddate: datetime.date|pd.Timestamp|None = datetime.date.today().replace(day=1) - datetime.timedelta(days=1),
categorization: dp.HoldingSectorsCat|None = dp.HoldingSectorsCat.default,
) -> None:
"""
Expand All @@ -42,6 +42,12 @@ def __init__(self,
self.enddate: datetime.date = enddate
self.categorization: dp.HoldingSectorsCat = categorization

# Type conversion
if (type(self.startdate) is pd.Timestamp):
self.startdate = self.startdate.date()
if (type(self.enddate) is pd.Timestamp):
self.enddate = self.enddate.date()

# Validation startdate and enddate
if isinstance(self.startdate, datetime.date) and self.startdate < datetime.date(2015, 1, 1):
raise ValueError("Startdate must be >= 2015-01-01")
Expand Down Expand Up @@ -91,16 +97,16 @@ async def __get_data_ksei(self, dbs:db.Session = next(db.get_dbs())) -> pd.DataF
if self.stockcode != "composite":
qry = qry.filter(db.KseiKepemilikanEfek.code == self.stockcode)

data_ksei = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas(use_pyarrow_extension_array=True).reset_index(drop=True).set_index("date").sort_index() # type: ignore
data_ksei = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas().reset_index(drop=True).set_index("date").sort_index() # type: ignore

return data_ksei

async def __get_data_scripless(self, list_date:list, dbs:db.Session = next(db.get_dbs())) -> pd.DataFrame:
# Query table stockdata: tradebleshares divided by listedshares for filtercode each list_date
qry = dbs.query(db.StockData.date, db.StockData.tradebleshares, db.StockData.listedshares).filter(db.StockData.code == self.stockcode).filter(db.StockData.date.in_(list_date)) # type: ignore
data_scripless = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas(use_pyarrow_extension_array=True).reset_index(drop=True).set_index("date").sort_index() # type: ignore
data_scripless = pl.read_database(query=qry.statement, connection=dbs.bind).to_pandas().reset_index(drop=True).set_index("date").sort_index() # type: ignore

data_scripless["scripless_ratio"] = data_scripless["tradebleshares"].astype(float) / data_scripless["listedshares"].astype(float)
data_scripless["scripless_ratio"] = data_scripless["tradebleshares"] / data_scripless["listedshares"]
return data_scripless[["scripless_ratio"]]

async def get(self, dbs:db.Session = next(db.get_dbs())) -> HoldingComposition:
Expand Down Expand Up @@ -132,7 +138,7 @@ async def get(self, dbs:db.Session = next(db.get_dbs())) -> HoldingComposition:
holding_composition[key] = data_ksei[self.categorization.value[key]].sum(axis=1)

# Calculate the percentage of total each row
holding_composition = holding_composition.astype(float).div(holding_composition.sum(axis=1), axis=0)
holding_composition = holding_composition.div(holding_composition.sum(axis=1), axis=0)

# Append data_scripless[scripless_ratio] column to holding_composition
holding_composition = holding_composition.join(data_scripless, how="left")
Expand Down
Loading

0 comments on commit 4ee19db

Please sign in to comment.