From f37c167c89c722a0ebde81a8e876c608cc1a5273 Mon Sep 17 00:00:00 2001 From: "Julia L. Wang" Date: Sun, 10 Dec 2023 12:39:50 -0500 Subject: [PATCH 1/5] Initial implementation of insiders tabs --- yfinance/base.py | 24 ++++++++ yfinance/scrapers/holders.py | 104 +++++++++++++++++++++++++++++++++++ yfinance/ticker.py | 12 ++++ 3 files changed, 140 insertions(+) diff --git a/yfinance/base.py b/yfinance/base.py index 66cc7aaf9..0e0c2034b 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -1747,6 +1747,30 @@ def get_mutualfund_holders(self, proxy=None, as_dict=False): if as_dict: return data.to_dict() return data + + def get_insider_purchases(self, proxy=None, as_dict=False): + self._holders.proxy = proxy or self.proxy + data = self._holders.insider_purchases + if data is not None: + if as_dict: + return data.to_dict() + return data + + def get_insider_transactions(self, proxy=None, as_dict=False): + self._holders.proxy = proxy or self.proxy + data = self._holders.insider_transactions + if data is not None: + if as_dict: + return data.to_dict() + return data + + def get_insider_roster_holders(self, proxy=None, as_dict=False): + self._holders.proxy = proxy or self.proxy + data = self._holders.insider_roster + if data is not None: + if as_dict: + return data.to_dict() + return data def get_info(self, proxy=None) -> dict: self._quote.proxy = proxy or self.proxy diff --git a/yfinance/scrapers/holders.py b/yfinance/scrapers/holders.py index 1376d41a1..82662a3bc 100644 --- a/yfinance/scrapers/holders.py +++ b/yfinance/scrapers/holders.py @@ -17,6 +17,10 @@ def __init__(self, data: YfData, symbol: str, proxy=None): self._institutional = None self._mutualfund = None + self._insider_transactions = None + self._insider_purchases = None + self._insider_roster = None + @property def major(self) -> pd.DataFrame: if self._major is None: @@ -34,6 +38,24 @@ def mutualfund(self) -> pd.DataFrame: if self._mutualfund is None: self._scrape(self.proxy) return self._mutualfund + + @property + def insider_transactions(self) -> pd.DataFrame: + if self._insider_transactions is None: + self._scrape_insider_transactions(self.proxy) + return self._insider_transactions + + @property + def insider_purchases(self) -> pd.DataFrame: + if self._insider_purchases is None: + self._scrape_insider_transactions(self.proxy) + return self._insider_purchases + + @property + def insider_roster(self) -> pd.DataFrame: + if self._insider_roster is None: + self._scrape_insider_ros(self.proxy) + return self._insider_roster def _scrape(self, proxy): ticker_url = f"{self._SCRAPE_URL_}/{self._symbol}" @@ -68,3 +90,85 @@ def _scrape(self, proxy): if '% Out' in self._mutualfund: self._mutualfund['% Out'] = self._mutualfund[ '% Out'].str.replace('%', '').astype(float) / 100 + + def _scrape_insider_transactions(self, proxy): + ticker_url = f"{self._SCRAPE_URL_}/{self._symbol}" + try: + resp = self._data.cache_get(ticker_url + '/insider-transactions', proxy=proxy) + insider_transactions = pd.read_html(StringIO(resp.text)) + except Exception: + insider_transactions = [] + + if len(insider_transactions) >= 3: + self._insider_purchases = insider_transactions[0] + self._insider_transactions = insider_transactions[2] + elif len(insider_transactions) >= 2: + self._insider_purchases = insider_transactions[0] + elif len(insider_transactions) >= 1: + self._insider_transactions = insider_transactions[0] + + if self._insider_transactions is not None: + holders = self._insider_transactions + + def split_insider_title(input_string): + import re + parts = input_string.split(' ') + + for i, part in enumerate(parts): + if not re.match(r'^[A-Z]+\.*-*[A-Z]*$', part): + name_part = ' '.join(parts[:i]) + title_part = ' '.join(parts[i:]) + return [name_part.strip(), title_part.strip()] + + return [input_string] + holders.loc[:, ['Insider', 'Position']] = holders['Insider']\ + .apply(split_insider_title).apply(lambda x: pd.Series(x, index=['Insider', 'Position'])) + + holders = holders[['Insider', 'Position'] + holders.columns\ + .difference(['Insider', 'Position']).tolist()] + + holders.fillna('N/A', inplace=True) + self._insider_transactions = holders + + if self._insider_purchases is not None: + holders = self._insider_purchases + + holders.fillna('N/A', inplace=True) + self._insider_purchases = holders + + + def _scrape_insider_ros(self, proxy): + ticker_url = f"{self._SCRAPE_URL_}/{self._symbol}" + try: + resp = self._data.cache_get(ticker_url + '/insider-roster', proxy=proxy) + insider_roster = pd.read_html(StringIO(resp.text)) + except Exception: + insider_roster = [] + + if len(insider_roster) >= 1: + self._insider_roster = insider_roster[0] + + if self._insider_roster is not None: + holders = self._insider_roster + + holders = holders[:-1] # Remove the last row + + def split_name_title(input_string): + import re + parts = input_string.split(' ') + + for i, part in enumerate(parts): + if not re.match(r'^[A-Z]+\.*-*[A-Z]*$', part): + name_part = ' '.join(parts[:i]) + title_part = ' '.join(parts[i:]) + return [name_part.strip(), title_part.strip()] + + return [input_string] + holders.loc[:, ['Individual or Entity', 'Position']] = holders['Individual or Entity']\ + .apply(split_name_title).apply(lambda x: pd.Series(x, index=['Individual or Entity', 'Position'])) + + holders = holders[['Individual or Entity', 'Position'] + holders.columns\ + .difference(['Individual or Entity', 'Position']).tolist()] + + self._insider_roster = holders + diff --git a/yfinance/ticker.py b/yfinance/ticker.py index af8dd750c..580481ccd 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -117,6 +117,18 @@ def institutional_holders(self) -> _pd.DataFrame: def mutualfund_holders(self) -> _pd.DataFrame: return self.get_mutualfund_holders() + @property + def insider_purchases(self) -> _pd.DataFrame: + return self.get_insider_purchases() + + @property + def insider_transactions(self) -> _pd.DataFrame: + return self.get_insider_transactions() + + @property + def insider_roster_holders(self) -> _pd.DataFrame: + return self.get_insider_roster_holders() + @property def dividends(self) -> _pd.Series: return self.get_dividends() From a1453770c6862163fd59ecab9a0536637487f44a Mon Sep 17 00:00:00 2001 From: "Julia L. Wang" Date: Sun, 10 Dec 2023 13:22:25 -0500 Subject: [PATCH 2/5] Added unit tests, all passing --- tests/ticker.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/ticker.py b/tests/ticker.py index 8f41cca65..e1678ab63 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -24,6 +24,9 @@ ("major_holders", pd.DataFrame), ("institutional_holders", pd.DataFrame), ("mutualfund_holders", pd.DataFrame), + ("insider_transactions", pd.DataFrame), + ("insider_purchases", pd.DataFrame), + ("insider_roster_holders", pd.DataFrame), ("splits", pd.Series), ("actions", pd.DataFrame), ("shares", pd.DataFrame), @@ -338,6 +341,30 @@ def test_mutualfund_holders(self): data_cached = self.ticker.mutualfund_holders self.assertIs(data, data_cached, "data not cached") + def test_insider_transactions(self): + data = self.ticker.insider_transactions + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.insider_transactions + self.assertIs(data, data_cached, "data not cached") + + def test_insider_purchases(self): + data = self.ticker.insider_purchases + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.insider_purchases + self.assertIs(data, data_cached, "data not cached") + + def test_insider_roster_holders(self): + data = self.ticker.insider_roster_holders + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.insider_roster_holders + self.assertIs(data, data_cached, "data not cached") + class TestTickerMiscFinancials(unittest.TestCase): session = None From 6a21ff2bf7d53ddabf766841ef5c0637cbb6966b Mon Sep 17 00:00:00 2001 From: "Julia L. Wang" Date: Sun, 10 Dec 2023 18:03:11 -0500 Subject: [PATCH 3/5] Added some date, shares formatting --- yfinance/scrapers/holders.py | 55 +++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/yfinance/scrapers/holders.py b/yfinance/scrapers/holders.py index 82662a3bc..5d806cccb 100644 --- a/yfinance/scrapers/holders.py +++ b/yfinance/scrapers/holders.py @@ -93,12 +93,18 @@ def _scrape(self, proxy): def _scrape_insider_transactions(self, proxy): ticker_url = f"{self._SCRAPE_URL_}/{self._symbol}" + resp = self._data.cache_get(ticker_url + '/insider-transactions', proxy=proxy) + + if "Will be right back" in resp.text: + raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" + "Our engineers are working quickly to resolve " + "the issue. Thank you for your patience.") + try: - resp = self._data.cache_get(ticker_url + '/insider-transactions', proxy=proxy) insider_transactions = pd.read_html(StringIO(resp.text)) - except Exception: + except ValueError: insider_transactions = [] - + if len(insider_transactions) >= 3: self._insider_purchases = insider_transactions[0] self._insider_transactions = insider_transactions[2] @@ -110,6 +116,7 @@ def _scrape_insider_transactions(self, proxy): if self._insider_transactions is not None: holders = self._insider_transactions + # add positions column def split_insider_title(input_string): import re parts = input_string.split(' ') @@ -127,22 +134,52 @@ def split_insider_title(input_string): holders = holders[['Insider', 'Position'] + holders.columns\ .difference(['Insider', 'Position']).tolist()] - holders.fillna('N/A', inplace=True) + # add N/A for no information + holders.fillna('N/A', inplace=True) + holders = holders.reset_index(drop=True) + + if 'Date' in holders: + holders['Date'] = pd.to_datetime(holders['Date']) + + if 'Shares' in holders: + holders['Shares'] = holders['Shares'].astype(int) + self._insider_transactions = holders if self._insider_purchases is not None: holders = self._insider_purchases holders.fillna('N/A', inplace=True) + holders = holders.reset_index(drop=True) + + if 'Shares' in holders: + def convert_shares(value): + import re + if re.match(r'^\d+(\.?\d*)?[BbMmKk%]$', value): + return value # Leave values like '40.9B', '7.30%', etc. unchanged + + elif pd.notna(pd.to_numeric(value, errors='coerce')): + return int(value) # Convert to integer if possible + + else: + return value + + holders['Shares'] = holders['Shares'].apply(convert_shares) self._insider_purchases = holders def _scrape_insider_ros(self, proxy): ticker_url = f"{self._SCRAPE_URL_}/{self._symbol}" - try: - resp = self._data.cache_get(ticker_url + '/insider-roster', proxy=proxy) + resp = self._data.cache_get(ticker_url + '/insider-roster', proxy=proxy) + + if "Will be right back" in resp.text: + raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" + "Our engineers are working quickly to resolve " + "the issue. Thank you for your patience.") + + try: insider_roster = pd.read_html(StringIO(resp.text)) - except Exception: + except ValueError: insider_roster = [] if len(insider_roster) >= 1: @@ -170,5 +207,9 @@ def split_name_title(input_string): holders = holders[['Individual or Entity', 'Position'] + holders.columns\ .difference(['Individual or Entity', 'Position']).tolist()] + # add N/A for no information + holders.fillna('N/A', inplace=True) + holders = holders.reset_index(drop=True) + self._insider_roster = holders From 245a43070ad153d300c3df9f57eee5fac8333fcd Mon Sep 17 00:00:00 2001 From: Unit Date: Mon, 11 Dec 2023 12:45:04 +0100 Subject: [PATCH 4/5] update holders fetching change from parsing html page to fetching api added parsing old code is commented out --- yfinance/scrapers/holders.py | 153 +++++++++++++++++++++++++++++++++-- 1 file changed, 146 insertions(+), 7 deletions(-) diff --git a/yfinance/scrapers/holders.py b/yfinance/scrapers/holders.py index 5d806cccb..76e5af1c8 100644 --- a/yfinance/scrapers/holders.py +++ b/yfinance/scrapers/holders.py @@ -3,6 +3,10 @@ import pandas as pd from yfinance.data import YfData +from yfinance.const import _BASE_URL_ +from yfinance.exceptions import YFNotImplementedError, YFinanceDataException, YFinanceException + +_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary/" class Holders: @@ -14,6 +18,7 @@ def __init__(self, data: YfData, symbol: str, proxy=None): self.proxy = proxy self._major = None + self._major_direct_holders = None self._institutional = None self._mutualfund = None @@ -24,39 +29,173 @@ def __init__(self, data: YfData, symbol: str, proxy=None): @property def major(self) -> pd.DataFrame: if self._major is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._major @property def institutional(self) -> pd.DataFrame: if self._institutional is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._institutional @property def mutualfund(self) -> pd.DataFrame: if self._mutualfund is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._mutualfund @property def insider_transactions(self) -> pd.DataFrame: if self._insider_transactions is None: - self._scrape_insider_transactions(self.proxy) + # self._scrape_insider_transactions(self.proxy) + self._fetch_and_parse() return self._insider_transactions @property def insider_purchases(self) -> pd.DataFrame: if self._insider_purchases is None: - self._scrape_insider_transactions(self.proxy) + # self._scrape_insider_transactions(self.proxy) + self._fetch_and_parse() return self._insider_purchases @property def insider_roster(self) -> pd.DataFrame: if self._insider_roster is None: - self._scrape_insider_ros(self.proxy) + # self._scrape_insider_ros(self.proxy) + self._fetch_and_parse() return self._insider_roster + def _fetch(self, proxy): + modules = ','.join(["institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity"]) + params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "symbol": self._symbol, "formatted": "false"} + result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_, user_agent_headers=self._data.user_agent_headers, params=params_dict, proxy=proxy) + return result + + def _fetch_and_parse(self): + result = self._fetch(self.proxy) + try: + data = result["quoteSummary"]["result"][0] + # parse "institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity" + self._parse_institution_ownership(data["institutionOwnership"]) + self._parse_fund_ownership(data["fundOwnership"]) + # self._parse_major_direct_holders(data["majorDirectHolders"]) # need more data to investigate + self._parse_major_holders_breakdown(data["majorHoldersBreakdown"]) + self._parse_insider_transactions(data["insiderTransactions"]) + self._parse_insider_holders(data["insiderHolders"]) + self._parse_net_share_purchase_activity(data["netSharePurchaseActivity"]) + except (KeyError, IndexError): + raise YFinanceDataException("Failed to parse holders json data.") + + @staticmethod + def _parse_raw_values(data): + if isinstance(data, dict) and "raw" in data: + return data["raw"] + return data + + def _parse_institution_ownership(self, data): + holders = data["ownershipList"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True) # "pctHeld": "% Out" + self._institutional = df + + def _parse_fund_ownership(self, data): + holders = data["ownershipList"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True) + self._mutualfund = df + + def _parse_major_direct_holders(self, data): + holders = data["holders"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "positionDirect": "Shares", "valueDirect": "Value"}, inplace=True) + self._major_direct_holders = df + + def _parse_major_holders_breakdown(self, data): + if "maxAge" in data: + del data["maxAge"] + df = pd.DataFrame.from_dict(data, orient="index") + if not df.empty: + df.columns.name = "Breakdown" + df.rename(columns={df.columns[0]: 'Value'}, inplace=True) + self._major = df + + def _parse_insider_transactions(self, data): + holders = data["transactions"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["startDate"] = pd.to_datetime(df["startDate"], unit="s") + df.rename(columns={ + "startDate": "Start Date", + "filerName": "Insider", + "filerRelation": "Position", + "filerUrl": "URL", + "moneyText": "Transaction", + "transactionText": "Text", + "shares": "Shares", + "value": "Value", + "ownership": "Ownership" # ownership flag, direct or institutional + }, inplace=True) + self._insider_transactions = df + + def _parse_insider_holders(self, data): + holders = data["holders"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["positionDirectDate"] = pd.to_datetime(df["positionDirectDate"], unit="s") + df["positionIndirectDate"] = pd.to_datetime(df["positionIndirectDate"], unit="s") + df["latestTransDate"] = pd.to_datetime(df["latestTransDate"], unit="s") + df.rename(columns={ + "name": "Name", + "relation": "Position", + "url": "URL", + "transactionDescription": "Most Recent Transaction", + "latestTransDate": "Latest Transaction Date", + "positionDirectDate": "Position Direct Date", + "positionDirect": "Shares Owned Directly", + "positionIndirectDate": "Position Indirect Date", + "positionIndirect": "Shares Owned Indirectly" + }, inplace=True) + self._insider_roster = df + + def _parse_net_share_purchase_activity(self, data): + if "maxAge" in data: + del data["maxAge"] + df = pd.DataFrame.from_dict(data, orient="index") + if not df.empty: + df.columns.name = "Activity" + df.rename(columns={df.columns[0]: 'Value'}, inplace=True) + self._insider_purchases = df + + """ def _scrape(self, proxy): ticker_url = f"{self._SCRAPE_URL_}/{self._symbol}" try: @@ -212,4 +351,4 @@ def split_name_title(input_string): holders = holders.reset_index(drop=True) self._insider_roster = holders - + """ From 9f0eb6b0ce4a9faa37a17439850ae91b076c2883 Mon Sep 17 00:00:00 2001 From: Unit Date: Mon, 11 Dec 2023 12:45:04 +0100 Subject: [PATCH 5/5] update holders fetching change from parsing html page to fetching api added parsing old code is commented out --- yfinance/scrapers/holders.py | 155 +++++++++++++++++++++++++++++++++-- 1 file changed, 147 insertions(+), 8 deletions(-) diff --git a/yfinance/scrapers/holders.py b/yfinance/scrapers/holders.py index 5d806cccb..aad130f7b 100644 --- a/yfinance/scrapers/holders.py +++ b/yfinance/scrapers/holders.py @@ -1,8 +1,12 @@ -from io import StringIO +# from io import StringIO import pandas as pd from yfinance.data import YfData +from yfinance.const import _BASE_URL_ +from yfinance.exceptions import YFinanceDataException + +_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary/" class Holders: @@ -14,6 +18,7 @@ def __init__(self, data: YfData, symbol: str, proxy=None): self.proxy = proxy self._major = None + self._major_direct_holders = None self._institutional = None self._mutualfund = None @@ -24,39 +29,173 @@ def __init__(self, data: YfData, symbol: str, proxy=None): @property def major(self) -> pd.DataFrame: if self._major is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._major @property def institutional(self) -> pd.DataFrame: if self._institutional is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._institutional @property def mutualfund(self) -> pd.DataFrame: if self._mutualfund is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._mutualfund @property def insider_transactions(self) -> pd.DataFrame: if self._insider_transactions is None: - self._scrape_insider_transactions(self.proxy) + # self._scrape_insider_transactions(self.proxy) + self._fetch_and_parse() return self._insider_transactions @property def insider_purchases(self) -> pd.DataFrame: if self._insider_purchases is None: - self._scrape_insider_transactions(self.proxy) + # self._scrape_insider_transactions(self.proxy) + self._fetch_and_parse() return self._insider_purchases @property def insider_roster(self) -> pd.DataFrame: if self._insider_roster is None: - self._scrape_insider_ros(self.proxy) + # self._scrape_insider_ros(self.proxy) + self._fetch_and_parse() return self._insider_roster + def _fetch(self, proxy): + modules = ','.join(["institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity"]) + params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "symbol": self._symbol, "formatted": "false"} + result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_, user_agent_headers=self._data.user_agent_headers, params=params_dict, proxy=proxy) + return result + + def _fetch_and_parse(self): + result = self._fetch(self.proxy) + try: + data = result["quoteSummary"]["result"][0] + # parse "institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity" + self._parse_institution_ownership(data["institutionOwnership"]) + self._parse_fund_ownership(data["fundOwnership"]) + # self._parse_major_direct_holders(data["majorDirectHolders"]) # need more data to investigate + self._parse_major_holders_breakdown(data["majorHoldersBreakdown"]) + self._parse_insider_transactions(data["insiderTransactions"]) + self._parse_insider_holders(data["insiderHolders"]) + self._parse_net_share_purchase_activity(data["netSharePurchaseActivity"]) + except (KeyError, IndexError): + raise YFinanceDataException("Failed to parse holders json data.") + + @staticmethod + def _parse_raw_values(data): + if isinstance(data, dict) and "raw" in data: + return data["raw"] + return data + + def _parse_institution_ownership(self, data): + holders = data["ownershipList"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True) # "pctHeld": "% Out" + self._institutional = df + + def _parse_fund_ownership(self, data): + holders = data["ownershipList"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True) + self._mutualfund = df + + def _parse_major_direct_holders(self, data): + holders = data["holders"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "positionDirect": "Shares", "valueDirect": "Value"}, inplace=True) + self._major_direct_holders = df + + def _parse_major_holders_breakdown(self, data): + if "maxAge" in data: + del data["maxAge"] + df = pd.DataFrame.from_dict(data, orient="index") + if not df.empty: + df.columns.name = "Breakdown" + df.rename(columns={df.columns[0]: 'Value'}, inplace=True) + self._major = df + + def _parse_insider_transactions(self, data): + holders = data["transactions"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["startDate"] = pd.to_datetime(df["startDate"], unit="s") + df.rename(columns={ + "startDate": "Start Date", + "filerName": "Insider", + "filerRelation": "Position", + "filerUrl": "URL", + "moneyText": "Transaction", + "transactionText": "Text", + "shares": "Shares", + "value": "Value", + "ownership": "Ownership" # ownership flag, direct or institutional + }, inplace=True) + self._insider_transactions = df + + def _parse_insider_holders(self, data): + holders = data["holders"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["positionDirectDate"] = pd.to_datetime(df["positionDirectDate"], unit="s") + df["positionIndirectDate"] = pd.to_datetime(df["positionIndirectDate"], unit="s") + df["latestTransDate"] = pd.to_datetime(df["latestTransDate"], unit="s") + df.rename(columns={ + "name": "Name", + "relation": "Position", + "url": "URL", + "transactionDescription": "Most Recent Transaction", + "latestTransDate": "Latest Transaction Date", + "positionDirectDate": "Position Direct Date", + "positionDirect": "Shares Owned Directly", + "positionIndirectDate": "Position Indirect Date", + "positionIndirect": "Shares Owned Indirectly" + }, inplace=True) + self._insider_roster = df + + def _parse_net_share_purchase_activity(self, data): + if "maxAge" in data: + del data["maxAge"] + df = pd.DataFrame.from_dict(data, orient="index") + if not df.empty: + df.columns.name = "Activity" + df.rename(columns={df.columns[0]: 'Value'}, inplace=True) + self._insider_purchases = df + + """ def _scrape(self, proxy): ticker_url = f"{self._SCRAPE_URL_}/{self._symbol}" try: @@ -212,4 +351,4 @@ def split_name_title(input_string): holders = holders.reset_index(drop=True) self._insider_roster = holders - + """