Skip to content

Commit

Permalink
chore: update analytics for july 2023 (clevercanary#892, clevercanary…
Browse files Browse the repository at this point in the history
…#893) (#3621)

* feat: update analytics package with features to support ga4 reports (anvilproject/anvil-portal#2662)

* feat: add more flexibility to api usage in analytics package (anvil#2662)

* feat: allow authenticating multiple service systems in analytics package (anvil#2662)

* fix: take into account dimensions set to None (anvil#2662)

* feat: standardize sort fields as list in analytics package (anvil#2662)

* fix: use `items` instead of `iteritems` (anvil#2662)

* feat: add ga4 support to analytics package (anvilproject/anvil-portal#2662)

* feat: add `pre_plot_df_processor` option for plot over time (anvilproject/anvil-portal#2662)

* fix: usage of `get_metrics_by_dimensions_v4_style` (anvilproject/anvil-portal#2662)

* fix: return service system alone if there's only one (anvilproject/anvil-portal#2662)

* feat: add `port` parameter to analytics authentication (anvilproject/anvil-portal#2662)

* feat: additional features to help with ga4 (anvilproject/anvil-portal#2662)

* feat: allow skipping api call in `get_data_df` (anvilproject/anvil-portal#2662)

* feat: improve table rendering (anvilproject/anvil-portal#2662)

* feat: better width for single columns (anvilproject/anvil-portal#2662)

* feat: support differing parameters for analytics periods (anvilproject/anvil-portal#2806)

* chore: update analytics for july 2023 (clevercanary#892, clevercanary#893)
  • Loading branch information
hunterckx authored Aug 23, 2023
1 parent cc07b4d commit 9c0c06b
Show file tree
Hide file tree
Showing 10 changed files with 759 additions and 907 deletions.
202 changes: 186 additions & 16 deletions analytics/analytics_package/analytics/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from googleapiclient.discovery import build
import os
import pandas as pd
import re


ga_service_params = (
Expand All @@ -23,25 +24,47 @@
lambda service, params: service.reports().query(**params).execute()
)

next_port = 8082
next_port = None
default_service_system = None

def authenticate(secret_name, service_params=ga_service_params):
scopes, service_name, service_version, param_subs, query_func = service_params

def authenticate(secret_name, first_service_params=ga_service_params, *other_service_params, port=None):
service_param_sets = (first_service_params,) + other_service_params

all_scopes = {scope for service_params in service_param_sets for scope in service_params[0]}

ANALYTICS_REPORTING_CLIENT_SECRET_PATH=os.getenv(secret_name)

flow = InstalledAppFlow.from_client_secrets_file(ANALYTICS_REPORTING_CLIENT_SECRET_PATH,
scopes=scopes)

scopes=all_scopes)
global next_port
credentials = flow.run_local_server(port=next_port)
next_port += 1

if port is None:
if next_port is None:
port = 8082
else:
port = next_port
next_port = port + 1
elif next_port is None:
next_port = port + 1

credentials = flow.run_local_server(port=port)

built_systems = [build_service_system(service_params, credentials) for service_params in service_param_sets]

return built_systems if len(built_systems) > 1 else built_systems[0]

def build_service_system(service_params, credentials):
if len(service_params) == 4:
service_name, service_version, param_subs_or_alt_api = service_params[1:]
query_func = None
else:
service_name, service_version, param_subs_or_alt_api, query_func = service_params[1:]

# Build the service object.
service = build(service_name, service_version, credentials=credentials)

service_system = (service, query_func, param_subs, credentials)
service_system = (service, query_func, param_subs_or_alt_api, credentials)

global default_service_system
if default_service_system is None:
Expand All @@ -50,17 +73,26 @@ def authenticate(secret_name, service_params=ga_service_params):
return service_system


def get_metrics_by_dimensions(metrics, dimensions, property, start_date, end_date, filters=None, segment=None, property_prefix='ga:', service_system=None, max_results=1000, sort_results=None, **other_params):

def get_metrics_by_dimensions(metrics, dimensions, service_system=None, sort_results=None, **other_params):
if service_system is None:
service_system = default_service_system

service, query_func, param_subs = service_system[:3]
service, query_func, param_subs_or_alt_api = service_system[:3]

metrics = normalize_id_list(metrics)
dimensions = normalize_id_list(dimensions)
sort_results = normalize_id_list(sort_results)

if isinstance(metrics, list):
metrics = ",".join(metrics)
if isinstance(dimensions, list):
dimensions = ",".join(dimensions)
if query_func is None:
return param_subs_or_alt_api(service, metrics, dimensions, sort_results=sort_results, **other_params)

return get_metrics_by_dimensions_v3_style(service, query_func, param_subs_or_alt_api, metrics, dimensions, sort_results=sort_results, **other_params)


def get_metrics_by_dimensions_v3_style(service, query_func, param_subs, metrics, dimensions, property, start_date, end_date, sort_results, filters=None, segment=None, property_prefix='ga:', max_results=1000, **other_params):
metrics = join_id_list(metrics)
dimensions = join_id_list(dimensions)
sort_results = join_id_list(sort_results)

# Dimensions and Metrics...
# Dimensions are atrributes, Metrics are quantitative measurements. e.g. city is a Dimension
Expand Down Expand Up @@ -99,7 +131,145 @@ def get_metrics_by_dimensions(metrics, dimensions, property, start_date, end_dat
df = results_to_df(results)

return df


def get_metrics_by_dimensions_v4_style(service, metrics, dimensions, property, start_date, end_date, sort_results, metric_filter=None, dimension_filter=None, base_metric_filter=None, base_dimension_filter=None, property_prefix="properties/", max_results=1000, **other_params):
property = property_prefix + property

params = {
"dateRanges": [{"startDate": start_date, "endDate": end_date}],
"metrics": [{"name": metric} for metric in metrics],
"dimensions": [{"name": dimension} for dimension in dimensions],
"metricFilter": parse_filter_expressions([base_metric_filter, metric_filter], True),
"dimensionFilter": parse_filter_expressions([base_dimension_filter, dimension_filter], False),
"orderBys": [({"dimension": {"dimensionName": field}} if field in dimensions else {"metric": {"metricName": field}}) for field in sort_results],
"limit": max_results
}

offset = 0
results = []
rows_left = None
has_more = True

while has_more:
result = service.properties().runReport(property=property, body=params).execute()
if rows_left is None:
rows_left = result.get("rowsCount", 0)
page_row_count = len(result["rows"]) if "rows" in result else 0
has_more = page_row_count > 0
if has_more:
results.append(result)
rows_left -= page_row_count
if rows_left <= 0:
has_more = False
else:
offset += max_results
params["offset"] = offset

df = v4_results_to_df(results, dimensions, metrics)

return df

def v4_results_to_df(results, dimensions, metrics):
if (len(results) == 0):
return pd.DataFrame(columns=dimensions + metrics)

df = pd.DataFrame()
for result in results:
# Collect column names
column_names = [header["name"] for header in result.get("dimensionHeaders", [])] + [header["name"] for header in result.get("metricHeaders", [])]

# Get data
if "rows" in result:
data = [[cell["value"] for cell in row.get("dimensionValues", [])] + [cell["value"] for cell in row.get("metricValues", [])] for row in result["rows"]]
else:
data = None

# Crete the dataframe
df = pd.concat([df, pd.DataFrame(data, columns = column_names)])

return df

ga4_service_params = (
['https://www.googleapis.com/auth/analytics.readonly'],
'analyticsdata', 'v1beta',
get_metrics_by_dimensions_v4_style
)

filter_match_re = re.compile(r"^(\w+)(?:(==|>|<|>=|<=|=@|=~)|(!=|!@|!~))(.*)$")
filter_escape_re = re.compile(r"\\([,;])")
filter_and_re = re.compile(r"((?:[^\\;]|\\(?:\\\\)*.)+)(?:;|$)")
filter_or_re = re.compile(r"((?:[^\\,]|\\(?:\\\\)*.)+)(?:,|$)")
filter_op_names = {
"==": "EQUAL",
"!=": "EQUAL",
">": "GREATER_THAN",
"<": "LESS_THAN",
">=": "GREATER_THAN_OR_EQUAL",
"<=": "LESS_THAN_OR_EQUAL",
"=@": "CONTAINS",
"!@": "CONTAINS",
"=~": "PARTIAL_REGEXP",
"!~": "PARTIAL_REGEXP"
}

def parse_filter_expression(text, is_metric):
if not isinstance(text, str):
return text

def unescape(value):
return filter_escape_re.sub(r"\1", value)

def parse_match(text):
field_name, plain_op, inverted_op, value = filter_match_re.match(text).groups()
op_name = filter_op_names[plain_op or inverted_op]
if is_metric:
plain_expression = {
"filter": {
"fieldName": field_name,
"numericFilter": {
"operation": op_name,
"value": {
"int64Value": value
}
}
}
}
else:
plain_expression = {
"filter": {
"fieldName": field_name,
"stringFilter": {
"matchType": "EXACT" if op_name == "EQUAL" else op_name,
"value": unescape(value),
"caseSensitive": True
}
}
}
return plain_expression if plain_op else {"notExpression": plain_expression}

def parse_or(text):
or_terms = [parse_match(t) for t in filter_or_re.findall(text)]
return or_terms[0] if len(or_terms) == 1 else {"orGroup": {"expressions": or_terms}}

and_terms = [parse_or(t) for t in filter_and_re.findall(text)]
return and_terms[0] if len(and_terms) == 1 else {"andGroup": {"expressions": and_terms}}

def parse_filter_expressions(filters, is_metric):
result = None
for filter in filters:
parsed = parse_filter_expression(filter, is_metric)
if parsed:
result = parsed if result is None else {"andGroup": {"expressions": [result, parsed]}}
return result


def normalize_id_list(ids):
return (ids.split(",") if isinstance(ids, str) else ids) if ids else []

def join_id_list(ids):
return ",".join(ids) if len(ids) > 0 else None


def build_params(source, subs):
result = {}
Expand Down
Loading

0 comments on commit 9c0c06b

Please sign in to comment.