From ac81d4dfed81302ff6f15185d6412846897a84c2 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Wed, 1 Jan 2025 21:02:57 -0800 Subject: [PATCH 1/2] Refactored retrieval of last API call timestamps to improve performance. - Changed from using an aggregation pipeline on `get_timeseries_db` to a loop fetching data from `get_profile_db`. - New approach iterates over `uuid_list`, fetching profile data and extracting `last_call_ts` for each user. - Simplifies logic, avoids heavy aggregation, and reduces database load. Results in a significant performance improvement. --- pages/home.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/pages/home.py b/pages/home.py index 6501f04..ff9521b 100644 --- a/pages/home.py +++ b/pages/home.py @@ -142,14 +142,20 @@ def find_last_get(uuid_list): stage1_timer ) - # Stage 2: Query the timeseries database to find the last GET request + # Stage 2: Fetch profile data and assign last_call with ect.Timer() as stage2_timer: - last_item = list(edb.get_timeseries_db().aggregate([ - {'$match': {'user_id': {'$in': uuid_list}}}, - {'$match': {'metadata.key': 'stats/server_api_time'}}, - {'$match': {'data.name': 'POST_/usercache/get'}}, - {'$group': {'_id': '$user_id', 'write_ts': {'$max': '$metadata.write_ts'}}}, - ])) + last_item = [] + for user_uuid in uuid_list: + user_dict = {'user_id': str(user_uuid)} + profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) + print(f'profile_data: {profile_data}') + if profile_data: + # Retrieve and assign last API call timestamp + last_call_ts = profile_data.get('last_call_ts') + if last_call_ts: + user_dict['write_ts'] = arrow.get(last_call_ts).timestamp() + last_item.append(user_dict) + esdsq.store_dashboard_time( "admin/home/find_last_get/query_timeseries_db", stage2_timer @@ -179,10 +185,12 @@ def get_number_of_active_users(uuid_list, threshold): # Stage 2: Calculate the number of active users based on the threshold with ect.Timer() as stage2_timer: number_of_active_users = 0 + current_timestamp = arrow.utcnow().timestamp() for item in last_get_entries: - last_get = item['write_ts'] + last_get = item.get('write_ts') + print(f'last_get: {last_get}') if last_get is not None: - last_call_diff = arrow.get().timestamp() - last_get + last_call_diff = current_timestamp - last_get if last_call_diff <= threshold: number_of_active_users += 1 esdsq.store_dashboard_time( From 9566ffa2973718d902d8d11ec218c05d1b39ac57 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Mon, 6 Jan 2025 14:33:20 -0800 Subject: [PATCH 2/2] Refactored get_number_of_active_users by combining it with find_last_get. Reduced the bloat of operations by leaving only one for loop that does the same thing. --- pages/home.py | 82 +++++++-------------------------------------------- 1 file changed, 11 insertions(+), 71 deletions(-) diff --git a/pages/home.py b/pages/home.py index ff9521b..ba549ff 100644 --- a/pages/home.py +++ b/pages/home.py @@ -131,83 +131,23 @@ def compute_trips_trend(trips_df, date_col): -def find_last_get(uuid_list): - with ect.Timer() as total_timer: - - # Stage 1: Convert UUID strings to UUID objects - with ect.Timer() as stage1_timer: - uuid_list = [UUID(npu) for npu in uuid_list] - esdsq.store_dashboard_time( - "admin/home/find_last_get/convert_to_uuid_objects", - stage1_timer - ) - - # Stage 2: Fetch profile data and assign last_call - with ect.Timer() as stage2_timer: - last_item = [] - for user_uuid in uuid_list: - user_dict = {'user_id': str(user_uuid)} - profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) - print(f'profile_data: {profile_data}') - if profile_data: - # Retrieve and assign last API call timestamp - last_call_ts = profile_data.get('last_call_ts') - if last_call_ts: - user_dict['write_ts'] = arrow.get(last_call_ts).timestamp() - last_item.append(user_dict) - - esdsq.store_dashboard_time( - "admin/home/find_last_get/query_timeseries_db", - stage2_timer - ) - - # Store the total time for the entire function - esdsq.store_dashboard_time( - "admin/home/find_last_get/total_time", - total_timer - ) - - return last_item - - - def get_number_of_active_users(uuid_list, threshold): with ect.Timer() as total_timer: - - # Stage 1: Find the last GET request entries for the given UUIDs - with ect.Timer() as stage1_timer: - last_get_entries = find_last_get(uuid_list) - esdsq.store_dashboard_time( - "admin/home/get_number_of_active_users/find_last_get_entries", - stage1_timer - ) - - # Stage 2: Calculate the number of active users based on the threshold - with ect.Timer() as stage2_timer: - number_of_active_users = 0 - current_timestamp = arrow.utcnow().timestamp() - for item in last_get_entries: - last_get = item.get('write_ts') - print(f'last_get: {last_get}') - if last_get is not None: - last_call_diff = current_timestamp - last_get - if last_call_diff <= threshold: - number_of_active_users += 1 - esdsq.store_dashboard_time( - "admin/home/get_number_of_active_users/calculate_active_users", - stage2_timer - ) - - # Store the total time for the entire function - esdsq.store_dashboard_time( - "admin/home/get_number_of_active_users/total_time", - total_timer - ) - + number_of_active_users = 0 + current_timestamp = arrow.utcnow().timestamp() + for npu in uuid_list: + user_uuid = UUID(npu) + profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) + if profile_data: + last_call_ts = profile_data.get('last_call_ts') + if last_call_ts and (current_timestamp - arrow.get(last_call_ts).timestamp()) <= threshold: + number_of_active_users += 1 + esdsq.store_dashboard_time("admin/home/get_number_of_active_users/total_time", total_timer) return number_of_active_users + def generate_card(title_text, body_text, icon): with ect.Timer() as total_timer: