-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_cashflow.py
176 lines (147 loc) · 6.46 KB
/
get_cashflow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import time
import pandas as pd
import numpy as np
from plaid import Client
from datetime import datetime
from google.oauth2 import service_account
from googleapiclient.http import MediaFileUpload
from googleapiclient.discovery import build
DEFAULT_INSTITUTION_ID = 'ins_1'
# Set Plaid API access data and get Client object
client = Client(
client_id='5ecba3951aa68500131ccd7a',
secret='4b79ae100459ab86833f5aedb9caf7',
public_key='19586b0d0ac976669c9f0abf56ddf7',
environment='sandbox'
)
# For upload files to google
GOOGLE_SCOPES = ['https://www.googleapis.com/auth/drive']
GOOGLE_SERVICE_ACCOUNT_FILE = '../service_account.json'
google_credentials = service_account.Credentials.from_service_account_file(
GOOGLE_SERVICE_ACCOUNT_FILE, scopes=GOOGLE_SCOPES
)
service = build('drive', 'v3', credentials=google_credentials)
def google_upload(filepath, name):
folder_id = '1UscZpDhizXJsJQoHUy6JjRS4o-DhBOtH'
name = name
file_path = filepath
file_metadata = {
'name': name,
'mimeType': 'application/vnd.google-apps.spreadsheet',
'parents': [folder_id]
}
media = MediaFileUpload(file_path, resumable=True)
r = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
return r
def main(
start_date='2020-01-01',
end_date='2020-05-01',
search_query="Bank of America",
):
# Search for first id search_query
response = client.Institutions.search(search_query)
search_result = [{institute['name']: institute['institution_id']} for institute in response['institutions']]
# Let's take first institute, name of which exactly matching to SEARCH_INSTITUTIONS_QUERY
try:
institution_id = [
institute[search_query]
for institute in search_result
if list(institute.keys())[0]==search_query
][0]
except IndexError:
print(
"Didn't find financial institutes matching this search query. Set INSTITUTION_ID to DEFAULT_INSTITUTION_ID"
)
institution_id = DEFAULT_INSTITUTION_ID
# Generate a public_token for a given institution ID
# and set of initial products
create_response = client.Sandbox.public_token.create(institution_id, ['transactions'])
# The generated public_token can now be
# exchanged for an access_token
exchange_response = client.Item.public_token.exchange(create_response['public_token'])
time.sleep(5)
# Get transactions by created access token
response = client.Transactions.get(exchange_response['access_token'], start_date=start_date, end_date=end_date)
transactions = response['transactions']
time.sleep(5)
# the transactions in the response are paginated, so make multiple calls while increasing the offset to
# retrieve all transactions
while len(transactions) < response['total_transactions']:
response = client.Transactions.get(
exchange_response['access_token'],
start_date=start_date,
end_date=end_date,
offset=len(transactions)
)
transactions.extend(response['transactions'])
# Create df_transactions_raw_data dataFrame for sending to Google Sheets
df_transactions_raw_data = pd.DataFrame(transactions)
# Create DataFrame from raw transactions and get columns only we need
df_transactions = df_transactions_raw_data[['category', 'date', 'amount']]
# Create column 'month' with specified format
df_transactions['month'] = df_transactions['date'].apply(lambda s: s[:-3])
# "Amount" is the settled dollar value.
# Positive values when money moves out of the account; negative values when money moves in.
# For example, purchases are positive; credit card payments, direct deposits, refunds are negative.
# So, in this case we need to invert sign for it:
# - transactions like credit card payments must be positive;
# - transactions like purchases must be negative.
df_transactions['Income/Expense'] = df_transactions['amount'].apply(lambda s: 'Expense' if s > 0 else 'Income')
df_transactions['amount'] = df_transactions['amount'].apply(lambda s: -s)
# Split categories for two parts: first category will be parent, and all other become children.
df_transactions['Category 1'] = df_transactions['category'].apply(lambda s: s[0])
df_transactions['Category 2'] = df_transactions['category'].apply(lambda s: s[1] if len(s)>1 else None)
# Remove temporary columns
df_transactions.drop(['category', 'date'], axis=1, inplace=True)
pivot_table = df_transactions.pivot_table(
values=['amount'],
index=['Income/Expense', 'Category 1', 'Category 2'],
columns='month',
aggfunc=np.sum,
margins=True,
margins_name='Grand total'
)
# Create Expense subtotals
pivot_expense_source = pivot_table.loc['Expense']
pivot_expense_result = pd.concat([
d.append(d.sum().rename(('', 'Total')))
for k, d in pivot_expense_source.groupby(level=0)
])
pivot_expense_result = pivot_expense_result.append(
pivot_expense_source.sum().rename(('Total', ''))
)
# Create Income subtotals
pivot_income_source = pivot_table.loc['Income']
pivot_income_result = pd.concat([
d.append(d.sum().rename(('', 'Total')))
for k, d in pivot_income_source.groupby(level=0)
])
pivot_income_result = pivot_income_result.append(
pivot_income_source.sum().rename(('Total', ''))
)
# Concatenate all fragments
pivot_result = pd.concat([
pd.concat([pivot_income_result], keys=['Income'], names=['Income/Expense']),
pd.concat([pivot_expense_result], keys=['Expense'], names=['Income/Expense']),
pd.concat([pivot_table.loc['Grand total']], keys=['Grand total'], names=['Income/Expense'])
])
# Rename months columns to format we need
pivot_result.rename(
columns={
col: datetime.strptime(col, '%Y-%m').strftime('%b %Y')
for col in pivot_result.columns.levels[1]
if col != 'Grand total'
},
inplace=True
)
# Substitute NaN to '-' in result table
pivot_result.fillna('-', inplace=True)
# need openpyxl
excel_writer = pd.ExcelWriter('Zagrebin_Plaid_test.xlsx')
df_transactions_raw_data.to_excel(excel_writer, 'Raw data')
pivot_result.to_excel(excel_writer, 'Result sheet')
excel_writer.save()
google_upload('./Zagrebin_Plaid_test.xlsx', 'Zagrebin_Plaid_test')
return None
if __name__ == "__main__":
main()