From 1865f5f02e36dfb28c3be0eee7cc0554a71ac830 Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 14 Sep 2022 11:36:36 -0700 Subject: [PATCH 01/45] working Amplitude and Personalize file writers --- generators/datagenerator/output.py | 3 - .../generate_interactions_personalize.py | 452 ++++++++++-------- 2 files changed, 245 insertions(+), 210 deletions(-) diff --git a/generators/datagenerator/output.py b/generators/datagenerator/output.py index fd038e8cf..46aaad821 100644 --- a/generators/datagenerator/output.py +++ b/generators/datagenerator/output.py @@ -5,9 +5,6 @@ from datagenerator.amplitude import AmplitudeIdentifyEvent, AmplitudeTrackEvent, AmplitudeSender from datagenerator.file import FileEvent -# TODO: Add Personalize output file formatter -# TODO: Add Amplitude output formatter - class OutputFormatter: def __init__(self, timestamp, user, platform, properties, name = None): self.event = name diff --git a/generators/generate_interactions_personalize.py b/generators/generate_interactions_personalize.py index 7bd6beb90..636cb30af 100644 --- a/generators/generate_interactions_personalize.py +++ b/generators/generate_interactions_personalize.py @@ -35,8 +35,12 @@ GENERATED_DATA_ROOT = "src/aws-lambda/personalize-pre-create-resources/data" # Interactions will be generated between these dates -FIRST_TIMESTAMP = 1591803782 # 2020-06-10, 18:43:02 -LAST_TIMESTAMP = 1599579782 # 2020-09-08, 18:43:02 + +# set the end time to the start of the script (since we are generating historical data) +# this is in epoch time in *seconds* +LAST_TIMESTAMP = time.time() +# generate data 90 days prior to the current time +FIRST_TIMESTAMP = LAST_TIMESTAMP - (60*60*24*90) # Users are set up with 3 product categories on their personas. If [0.6, 0.25, 0.15] it means # 60% of the time they'll choose a product from the first category, etc. @@ -73,7 +77,6 @@ # Minimum number of interactions to generate min_interactions = 675000 -# min_interactions = 50000 # Percentages of each event type to generate product_added_percent = .08 @@ -81,6 +84,39 @@ checkout_started_percent = .02 order_completed_percent = .01 +class PersonalizeOutputFileWriter: + def __init__(self, file_name): + f = open(file_name, 'w') + self.csv_file = csv.writer(f) + # Write the header row + self.csv_file.writerow(["ITEM_ID", "USER_ID", "EVENT_TYPE", "TIMESTAMP", "DISCOUNT"]) + + def event(self, event_name, timestamp, user, **properties): + self.csv_file.writerow([properties['product']['id'], + user['id'], + event_name, + timestamp, + properties['discount_context']]) + +class AmplitudeOutputFileWriter: + def __init__(self, file_name): + self.file = open(file_name, 'w') + + def event(self, event_name, timestamp, user, **properties): + user_id = f'{user["id"]:0>5}' + amplitude_event = { + 'event_type': event_name, + 'time': timestamp * 1000, # Amplitude wants time in ms since the epoch + 'user_id': user_id, + 'user_properties': { + 'name': user['name'], + 'age': user['age'], + 'gender': user['gender'], + 'persona': user['persona'] + } + } + self.file.write(f'{amplitude_event}\n') # write this in Amplitude JSON event format for S3 import + def generate_user_items(out_users_filename, out_items_filename, in_users_filenames, in_products_filename): Path(out_items_filename).parents[0].mkdir(parents=True, exist_ok=True) @@ -168,221 +204,223 @@ def generate_interactions(out_interactions_filename, users_df, products_df): print("Writing interactions to: {}".format(out_interactions_filename)) - with open(out_interactions_filename, 'w') as outfile: - f = csv.writer(outfile) - f.writerow(["ITEM_ID", "USER_ID", "EVENT_TYPE", "TIMESTAMP", "DISCOUNT"]) - - category_frequencies = products_df.category.value_counts() - category_frequencies /= sum(category_frequencies.values) - - interaction_product_counts = defaultdict(int) - - # Here we build up a list for each category/gender, of product - # affinities. The product affinity is keyed by one product, - # so we do not end up with exactly PRODUCT_AFFINITY_N sized - # cliques. They overlap a little over multiple users - # - that is why PRODUCT_AFFINITY_N - # can be a little bit lower than a desired clique size. - all_categories = products_df.category.unique() - product_affinities_bycatgender = {} - for category in all_categories: - for gender in ['M', 'F']: - products_cat = products_df.loc[products_df.category==category] - products_cat = products_cat.loc[ - products_cat.gender_affinity.isnull()|(products_cat.gender_affinity==gender)].id.values - # We ensure that all products have PRODUCT_AFFINITY_N products that lead into it - # and PRODUCT_AFFINITY_N products it leads to - affinity_matrix = sum([np.roll(np.identity(len(products_cat)), [0, i], [0, 1]) - for i in range(PRODUCT_AFFINITY_N)]) - np.random.shuffle(affinity_matrix) - affinity_matrix = affinity_matrix.T - np.random.shuffle(affinity_matrix) - affinity_matrix = affinity_matrix.astype(bool) # use as boolean index - affinity_matrix = affinity_matrix | np.identity(len(products_cat), dtype=bool) - - product_infinities = [products_cat[row] for row in affinity_matrix] - product_affinities_bycatgender[(category, gender)] = { - products_cat[i]: products_df.loc[products_df.id.isin(product_infinities[i])] - for i in range(len(products_cat))} - - user_category_to_first_prod = {} - - while interactions < min_interactions: - if (time.time() > next_update_progress): - rate = interactions / (time.time() - start_time_progress) - to_go = (min_interactions - interactions) / rate - print('Generated {} interactions so far (about {} seconds to go)'.format(interactions, int(to_go))) - next_update_progress += PROGRESS_MONITOR_SECONDS_UPDATE - - # Pick a random user - user = users_df.loc[random.randint(0, users_df.shape[0] - 1)] - - # Determine category affinity from user's persona - persona = user['persona'] - # If user persona has sub-categories, we will use those sub-categories to find products for users to partake - # in interactions with. Otehrwise, we will use the high-level categories. - has_subcategories = ':' in user['persona'] - preferred_categories_and_subcats = persona.split('_') - preferred_highlevel_categories = [catstring.split(':')[0] for catstring in preferred_categories_and_subcats] - # preferred_styles = [catstring.split(':')[1] for catstring in preferred_categories_and_subcats] - - p_normalised = (category_affinity_probs * category_frequencies[preferred_highlevel_categories].values) - p_normalised /= p_normalised.sum() - p = NORMALISE_PER_PRODUCT_WEIGHT * p_normalised + (1-NORMALISE_PER_PRODUCT_WEIGHT) * category_affinity_probs - - # Select category based on weighted preference of category order. - chosen_category_ind = np.random.choice(list(range(len(preferred_categories_and_subcats))), 1, p=p)[0] - category = preferred_highlevel_categories[chosen_category_ind] - #category_and_subcat = np.random.choice(preferred_categories_and_subcats, 1, p=p)[0] - - discount_persona = user['discount_persona'] - - gender = user['gender'] - - if has_subcategories: - # if there is a preferred style we choose from those products with this style and category - # but we ignore gender. - # We also do not attempt to keep balance across categories. - style = preferred_categories_and_subcats[chosen_category_ind].split(':')[1] - cachekey = ('category-style', category, style) - prods_subset_df = subsets_cache.get(cachekey) + ############################################################################### + # TODO: ADD A CMD LINE OPTION TO CONTROL THIS + + # Create a file writer + ow = AmplitudeOutputFileWriter(out_interactions_filename) + + category_frequencies = products_df.category.value_counts() + category_frequencies /= sum(category_frequencies.values) + + interaction_product_counts = defaultdict(int) + + # Here we build up a list for each category/gender, of product + # affinities. The product affinity is keyed by one product, + # so we do not end up with exactly PRODUCT_AFFINITY_N sized + # cliques. They overlap a little over multiple users + # - that is why PRODUCT_AFFINITY_N + # can be a little bit lower than a desired clique size. + all_categories = products_df.category.unique() + product_affinities_bycatgender = {} + for category in all_categories: + for gender in ['M', 'F']: + products_cat = products_df.loc[products_df.category==category] + products_cat = products_cat.loc[ + products_cat.gender_affinity.isnull()|(products_cat.gender_affinity==gender)].id.values + # We ensure that all products have PRODUCT_AFFINITY_N products that lead into it + # and PRODUCT_AFFINITY_N products it leads to + affinity_matrix = sum([np.roll(np.identity(len(products_cat)), [0, i], [0, 1]) + for i in range(PRODUCT_AFFINITY_N)]) + np.random.shuffle(affinity_matrix) + affinity_matrix = affinity_matrix.T + np.random.shuffle(affinity_matrix) + affinity_matrix = affinity_matrix.astype(bool) # use as boolean index + affinity_matrix = affinity_matrix | np.identity(len(products_cat), dtype=bool) + + product_infinities = [products_cat[row] for row in affinity_matrix] + product_affinities_bycatgender[(category, gender)] = { + products_cat[i]: products_df.loc[products_df.id.isin(product_infinities[i])] + for i in range(len(products_cat))} + + user_category_to_first_prod = {} + + while interactions < min_interactions: + if (time.time() > next_update_progress): + rate = interactions / (time.time() - start_time_progress) + to_go = (min_interactions - interactions) / rate + print('Generated {} interactions so far (about {} seconds to go)'.format(interactions, int(to_go))) + next_update_progress += PROGRESS_MONITOR_SECONDS_UPDATE + + # Pick a random user + user = users_df.loc[random.randint(0, users_df.shape[0] - 1)] + + # Determine category affinity from user's persona + persona = user['persona'] + # If user persona has sub-categories, we will use those sub-categories to find products for users to partake + # in interactions with. Otehrwise, we will use the high-level categories. + has_subcategories = ':' in user['persona'] + preferred_categories_and_subcats = persona.split('_') + preferred_highlevel_categories = [catstring.split(':')[0] for catstring in preferred_categories_and_subcats] + # preferred_styles = [catstring.split(':')[1] for catstring in preferred_categories_and_subcats] + + p_normalised = (category_affinity_probs * category_frequencies[preferred_highlevel_categories].values) + p_normalised /= p_normalised.sum() + p = NORMALISE_PER_PRODUCT_WEIGHT * p_normalised + (1-NORMALISE_PER_PRODUCT_WEIGHT) * category_affinity_probs + + # Select category based on weighted preference of category order. + chosen_category_ind = np.random.choice(list(range(len(preferred_categories_and_subcats))), 1, p=p)[0] + category = preferred_highlevel_categories[chosen_category_ind] + #category_and_subcat = np.random.choice(preferred_categories_and_subcats, 1, p=p)[0] + + discount_persona = user['discount_persona'] + + gender = user['gender'] + + if has_subcategories: + # if there is a preferred style we choose from those products with this style and category + # but we ignore gender. + # We also do not attempt to keep balance across categories. + style = preferred_categories_and_subcats[chosen_category_ind].split(':')[1] + cachekey = ('category-style', category, style) + prods_subset_df = subsets_cache.get(cachekey) + + if prods_subset_df is None: + # Select products from selected category without gender affinity or that match user's gender + prods_subset_df = products_df.loc[(products_df['category']==category) & + (products_df['style']==style)] + # Update cache for quicker lookup next time + subsets_cache[cachekey] = prods_subset_df + else: + # We are only going to use the machinery to keep things balanced + # if there is no style appointed on the user preferences. + # Here, in order to keep the number of products that are related to a product, + # we restrict the size of the set of products that are recommended to an individual + # user - in effect, the available subset for a particular category/gender + # depends on the first product selected, which is selected as per previous logic + # (looking at category affinities and gender) + usercat_key = (user['id'], category) # has this user already selected a "first" product? + if usercat_key in user_category_to_first_prod: + # If a first product is already selected, we use the product affinities for that product + # To provide the list of products to select from + first_prod = user_category_to_first_prod[usercat_key] + prods_subset_df = product_affinities_bycatgender[(category, gender)][first_prod] + + if not usercat_key in user_category_to_first_prod: + # If the user has not yet selected a first product for this category + # we do it by choosing between all products for gender. + # First, check if subset data frame is already cached for category & gender + cachekey = ('category-gender', category, gender) + prods_subset_df = subsets_cache.get(cachekey) if prods_subset_df is None: # Select products from selected category without gender affinity or that match user's gender - prods_subset_df = products_df.loc[(products_df['category']==category) & - (products_df['style']==style)] - # Update cache for quicker lookup next time + prods_subset_df = products_df.loc[(products_df['category'] == category) & ( + (products_df['gender_affinity'] == gender) | (products_df['gender_affinity'].isnull()))] + # Update cache subsets_cache[cachekey] = prods_subset_df - else: - # We are only going to use the machinery to keep things balanced - # if there is no style appointed on the user preferences. - # Here, in order to keep the number of products that are related to a product, - # we restrict the size of the set of products that are recommended to an individual - # user - in effect, the available subset for a particular category/gender - # depends on the first product selected, which is selected as per previous logic - # (looking at category affinities and gender) - usercat_key = (user['id'], category) # has this user already selected a "first" product? - if usercat_key in user_category_to_first_prod: - # If a first product is already selected, we use the product affinities for that product - # To provide the list of products to select from - first_prod = user_category_to_first_prod[usercat_key] - prods_subset_df = product_affinities_bycatgender[(category, gender)][first_prod] - - if not usercat_key in user_category_to_first_prod: - # If the user has not yet selected a first product for this category - # we do it by choosing between all products for gender. - - # First, check if subset data frame is already cached for category & gender - cachekey = ('category-gender', category, gender) - prods_subset_df = subsets_cache.get(cachekey) - if prods_subset_df is None: - # Select products from selected category without gender affinity or that match user's gender - prods_subset_df = products_df.loc[(products_df['category'] == category) & ( - (products_df['gender_affinity'] == gender) | (products_df['gender_affinity'].isnull()))] - # Update cache - subsets_cache[cachekey] = prods_subset_df - - # Pick a random product from gender filtered subset - product = prods_subset_df.sample().iloc[0] - - interaction_product_counts[product.id] += 1 - - user_to_product[user['id']].add(product['id']) - if not usercat_key in user_category_to_first_prod: - user_category_to_first_prod[usercat_key] = product['id'] + # Pick a random product from gender filtered subset + product = prods_subset_df.sample().iloc[0] - # Decide if the product the user is interacting with is discounted - if discount_persona == 'discount_indifferent': - discounted = random.random() < DISCOUNT_PROBABILITY - elif discount_persona == 'all_discounts': + interaction_product_counts[product.id] += 1 + + user_to_product[user['id']].add(product['id']) + + if not usercat_key in user_category_to_first_prod: + user_category_to_first_prod[usercat_key] = product['id'] + + # Decide if the product the user is interacting with is discounted + if discount_persona == 'discount_indifferent': + discounted = random.random() < DISCOUNT_PROBABILITY + elif discount_persona == 'all_discounts': + discounted = random.random() < DISCOUNT_PROBABILITY_WITH_PREFERENCE + elif discount_persona == 'lower_priced_products': + if product.price < average_product_price: discounted = random.random() < DISCOUNT_PROBABILITY_WITH_PREFERENCE - elif discount_persona == 'lower_priced_products': - if product.price < average_product_price: - discounted = random.random() < DISCOUNT_PROBABILITY_WITH_PREFERENCE - else: - discounted = random.random() < DISCOUNT_PROBABILITY else: - raise ValueError(f'Unable to handle discount persona: {discount_persona}') - - num_interaction_sets_to_insert = 1 - prodcnts = list(interaction_product_counts.values()) - prodcnts_max = max(prodcnts) if len(prodcnts) > 0 else 0 - prodcnts_min = min(prodcnts) if len(prodcnts) > 0 else 0 - prodcnts_avg = sum(prodcnts)/len(prodcnts) if len(prodcnts) > 0 else 0 - if interaction_product_counts[product.id] * 2 < prodcnts_max: - num_interaction_sets_to_insert += 1 - if interaction_product_counts[product.id] < prodcnts_avg: - num_interaction_sets_to_insert += 1 - if interaction_product_counts[product.id] == prodcnts_min: - num_interaction_sets_to_insert += 1 - - for _ in range(num_interaction_sets_to_insert): - - discount_context = 'Yes' if discounted else 'No' - - this_timestamp = next_timestamp + random.randint(1, seconds_increment) - f.writerow([product['id'], - user['id'], - 'View', - this_timestamp, - discount_context]) - - next_timestamp += seconds_increment - product_viewed_count += 1 + discounted = random.random() < DISCOUNT_PROBABILITY + else: + raise ValueError(f'Unable to handle discount persona: {discount_persona}') + + num_interaction_sets_to_insert = 1 + prodcnts = list(interaction_product_counts.values()) + prodcnts_max = max(prodcnts) if len(prodcnts) > 0 else 0 + prodcnts_min = min(prodcnts) if len(prodcnts) > 0 else 0 + prodcnts_avg = sum(prodcnts)/len(prodcnts) if len(prodcnts) > 0 else 0 + if interaction_product_counts[product.id] * 2 < prodcnts_max: + num_interaction_sets_to_insert += 1 + if interaction_product_counts[product.id] < prodcnts_avg: + num_interaction_sets_to_insert += 1 + if interaction_product_counts[product.id] == prodcnts_min: + num_interaction_sets_to_insert += 1 + + for _ in range(num_interaction_sets_to_insert): + + discount_context = 'Yes' if discounted else 'No' + + this_timestamp = next_timestamp + random.randint(1, seconds_increment) + ow.event('View', + this_timestamp, + user, + product=product, + discount_context=discount_context) + + next_timestamp += seconds_increment + product_viewed_count += 1 + interactions += 1 + + if discounted: + discounted_product_viewed_count += 1 + + if product_added_count < int(product_viewed_count * product_added_percent): + this_timestamp += random.randint(1, int(seconds_increment / 2)) + ow.event('AddToCart', + this_timestamp, + user, + product=product, + discount_context=discount_context) interactions += 1 + product_added_count += 1 if discounted: - discounted_product_viewed_count += 1 - - if product_added_count < int(product_viewed_count * product_added_percent): - this_timestamp += random.randint(1, int(seconds_increment / 2)) - f.writerow([product['id'], - user['id'], - 'AddToCart', - this_timestamp, - discount_context]) - interactions += 1 - product_added_count += 1 - - if discounted: - discounted_product_added_count += 1 - - if cart_viewed_count < int(product_viewed_count * cart_viewed_percent): - this_timestamp += random.randint(1, int(seconds_increment / 2)) - f.writerow([product['id'], - user['id'], - 'ViewCart', - this_timestamp, - discount_context]) - interactions += 1 - cart_viewed_count += 1 - if discounted: - discounted_cart_viewed_count += 1 - - if checkout_started_count < int(product_viewed_count * checkout_started_percent): - this_timestamp += random.randint(1, int(seconds_increment / 2)) - f.writerow([product['id'], - user['id'], - 'StartCheckout', - this_timestamp, - discount_context]) - interactions += 1 - checkout_started_count += 1 - if discounted: - discounted_checkout_started_count += 1 - - if order_completed_count < int(product_viewed_count * order_completed_percent): - this_timestamp += random.randint(1, int(seconds_increment / 2)) - f.writerow([product['id'], - user['id'], - 'Purchase', - this_timestamp, - discount_context]) - interactions += 1 - order_completed_count += 1 - if discounted: - discounted_order_completed_count += 1 + discounted_product_added_count += 1 + + if cart_viewed_count < int(product_viewed_count * cart_viewed_percent): + this_timestamp += random.randint(1, int(seconds_increment / 2)) + ow.event('ViewCart', + this_timestamp, + user, + product=product, + discount_context=discount_context) + interactions += 1 + cart_viewed_count += 1 + if discounted: + discounted_cart_viewed_count += 1 + + if checkout_started_count < int(product_viewed_count * checkout_started_percent): + this_timestamp += random.randint(1, int(seconds_increment / 2)) + ow.event('StartCheckout', + this_timestamp, + user, + product=product, + discount_context=discount_context) + interactions += 1 + checkout_started_count += 1 + if discounted: + discounted_checkout_started_count += 1 + + if order_completed_count < int(product_viewed_count * order_completed_percent): + this_timestamp += random.randint(1, int(seconds_increment / 2)) + ow.event('Purchase', + this_timestamp, + user, + product=product, + discount_context=discount_context) + interactions += 1 + order_completed_count += 1 + if discounted: + discounted_order_completed_count += 1 print("Interactions generation done.") print(f"Total interactions: {interactions}") From 15768bb8aea1cf3508f25cfed45d0915ebdd0072 Mon Sep 17 00:00:00 2001 From: Igor Date: Thu, 15 Sep 2022 14:40:48 -0700 Subject: [PATCH 02/45] cleaned up errors + data quality --- .../generate_interactions_personalize.py | 97 +++++++++++-------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/generators/generate_interactions_personalize.py b/generators/generate_interactions_personalize.py index 636cb30af..021072042 100644 --- a/generators/generate_interactions_personalize.py +++ b/generators/generate_interactions_personalize.py @@ -26,6 +26,7 @@ import random import yaml import logging +import uuid from collections import defaultdict # Keep things deterministic @@ -33,6 +34,17 @@ # Where to put the generated data so that it is picked up by stage.sh GENERATED_DATA_ROOT = "src/aws-lambda/personalize-pre-create-resources/data" +IN_PRODUCTS_FILENAME = "src/products/src/products-service/data/products.yaml" +IN_USERS_FILENAMES = ["src/users/src/users-service/data/users.json.gz", + "src/users/src/users-service/data/cstore_users.json.gz"] + +# This is where stage.sh will pick up input data files +out_items_filename = f"{GENERATED_DATA_ROOT}/items.csv" +out_users_filename = f"{GENERATED_DATA_ROOT}/users.csv" + +# These are output file definitions +personalize_interactions_file = f"{GENERATED_DATA_ROOT}/interactions.csv" +amplitude_interactions_file = f"{GENERATED_DATA_ROOT}/amplitude/interactions.json" # Interactions will be generated between these dates @@ -60,23 +72,13 @@ DISCOUNT_PROBABILITY = 0.2 DISCOUNT_PROBABILITY_WITH_PREFERENCE = 0.5 -IN_PRODUCTS_FILENAME = "src/products/src/products-service/data/products.yaml" -IN_USERS_FILENAMES = ["src/users/src/users-service/data/users.json.gz", - "src/users/src/users-service/data/cstore_users.json.gz"] - PROGRESS_MONITOR_SECONDS_UPDATE = 30 - GENDER_ANY = 'Any' -# This is where stage.sh will pick them up from -out_items_filename = f"{GENERATED_DATA_ROOT}/items.csv" -out_users_filename = f"{GENERATED_DATA_ROOT}/users.csv" -out_interactions_filename = f"{GENERATED_DATA_ROOT}/interactions.csv" - # The meaning of the below constants is described in the relevant notebook. # Minimum number of interactions to generate -min_interactions = 675000 +min_interactions = 675000 # Percentages of each event type to generate product_added_percent = .08 @@ -84,6 +86,20 @@ checkout_started_percent = .02 order_completed_percent = .01 +class OutputWriter: + def __init__(self): + # Make sure paths exist + Path(personalize_interactions_file).parents[0].mkdir(parents=True, exist_ok=True) + Path(amplitude_interactions_file).parents[0].mkdir(parents=True, exist_ok=True) + # Create the writers + self.personalize_writer = PersonalizeOutputFileWriter(personalize_interactions_file) + self.amplitude_writer = AmplitudeOutputFileWriter(amplitude_interactions_file) + print(f'Writing interactions to: {personalize_interactions_file}, {amplitude_interactions_file}') + + def event(self, event_name, timestamp, user, **properties): + self.personalize_writer.event(event_name, timestamp, properties['product']['id'], user['id'], properties['discount_context']) + self.amplitude_writer.event(event_name, timestamp, user["id"], user['name'], user['age'], user['gender'], user['persona'], properties['product']) + class PersonalizeOutputFileWriter: def __init__(self, file_name): f = open(file_name, 'w') @@ -91,31 +107,43 @@ def __init__(self, file_name): # Write the header row self.csv_file.writerow(["ITEM_ID", "USER_ID", "EVENT_TYPE", "TIMESTAMP", "DISCOUNT"]) - def event(self, event_name, timestamp, user, **properties): - self.csv_file.writerow([properties['product']['id'], - user['id'], + def event(self, event_name, timestamp, product_id, user_id, discount_context): + self.csv_file.writerow([product_id, + user_id, event_name, timestamp, - properties['discount_context']]) + discount_context]) + +def np_encoder(object): + if isinstance(object, np.generic): + return object.item() class AmplitudeOutputFileWriter: def __init__(self, file_name): self.file = open(file_name, 'w') - def event(self, event_name, timestamp, user, **properties): - user_id = f'{user["id"]:0>5}' + def event(self, event_name, timestamp, user_id, user_name, user_age, user_gender, user_persona, product): + user_id_str = f'{user_id:0>5}' amplitude_event = { - 'event_type': event_name, - 'time': timestamp * 1000, # Amplitude wants time in ms since the epoch - 'user_id': user_id, - 'user_properties': { - 'name': user['name'], - 'age': user['age'], - 'gender': user['gender'], - 'persona': user['persona'] + "event_type": event_name, + "time": int(timestamp * 1000), # Amplitude wants time in ms since the epoch, we have sec + "user_id": user_id_str, # Amplitude wants a UID as a string with no less than 5 chars + "insert_id": str(uuid.uuid4()), # This is to prevent duplicates when re-running event gen scripts + "event_properties": { + "product_id": product["id"], + "product_price": product["price"], + "product_category": product["category"], + "product_description": product["description"], + "product_style": product["style"] + }, + "user_properties": { + "name": user_name, + "age": user_age, + "gender": user_gender, + "persona": user_persona } } - self.file.write(f'{amplitude_event}\n') # write this in Amplitude JSON event format for S3 import + self.file.write(f'{json.dumps(amplitude_event, default=np_encoder)}\n') # write this in Amplitude JSON event format for S3 import def generate_user_items(out_users_filename, out_items_filename, in_users_filenames, in_products_filename): @@ -156,7 +184,7 @@ def generate_user_items(out_users_filename, out_items_filename, in_users_filenam return users_df, products_df -def generate_interactions(out_interactions_filename, users_df, products_df): +def generate_interactions(users_df, products_df): """Generate items.csv, users.csv from users and product dataframes makes interactions.csv by simulating some shopping behaviour.""" @@ -172,8 +200,6 @@ def generate_interactions(out_interactions_filename, users_df, products_df): order_completed_count = 0 discounted_order_completed_count = 0 - Path(out_interactions_filename).parents[0].mkdir(parents=True, exist_ok=True) - # ensure determinism random.seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) @@ -193,7 +219,7 @@ def generate_interactions(out_interactions_filename, users_df, products_df): time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(next_timestamp)))) print('Seconds increment: {}'.format(seconds_increment)) - print("Generating interactions... (this may take a few minutes)") + print("Generating interactions... (this will take a few minutes)") interactions = 0 subsets_cache = {} @@ -202,13 +228,8 @@ def generate_interactions(out_interactions_filename, users_df, products_df): category_affinity_probs = np.array(CATEGORY_AFFINITY_PROBS) - print("Writing interactions to: {}".format(out_interactions_filename)) - - ############################################################################### - # TODO: ADD A CMD LINE OPTION TO CONTROL THIS - # Create a file writer - ow = AmplitudeOutputFileWriter(out_interactions_filename) + ow = OutputWriter() category_frequencies = products_df.category.value_counts() category_frequencies /= sum(category_frequencies.values) @@ -249,7 +270,7 @@ def generate_interactions(out_interactions_filename, users_df, products_df): if (time.time() > next_update_progress): rate = interactions / (time.time() - start_time_progress) to_go = (min_interactions - interactions) / rate - print('Generated {} interactions so far (about {} seconds to go)'.format(interactions, int(to_go))) + print(f'Generated {interactions} interactions so far (about {int(to_go/60)} minutes to go)') next_update_progress += PROGRESS_MONITOR_SECONDS_UPDATE # Pick a random user @@ -437,4 +458,4 @@ def generate_interactions(out_interactions_filename, users_df, products_df): logging.basicConfig(level=logging.INFO) users_df, products_df = generate_user_items(out_users_filename, out_items_filename, IN_USERS_FILENAMES, IN_PRODUCTS_FILENAME) - generate_interactions(out_interactions_filename, users_df, products_df) + generate_interactions(users_df, products_df) From d526c198ee931aa47483745897683088008b5acf Mon Sep 17 00:00:00 2001 From: Igor Date: Sat, 17 Sep 2022 10:53:12 -0700 Subject: [PATCH 03/45] fixed time compression issue in generator --- .../generate_interactions_personalize.py | 28 ++++---- generators/send_to_amplitude.py | 66 +++++++++++++++++++ 2 files changed, 80 insertions(+), 14 deletions(-) create mode 100644 generators/send_to_amplitude.py diff --git a/generators/generate_interactions_personalize.py b/generators/generate_interactions_personalize.py index 021072042..7ef6f5139 100644 --- a/generators/generate_interactions_personalize.py +++ b/generators/generate_interactions_personalize.py @@ -78,7 +78,7 @@ # The meaning of the below constants is described in the relevant notebook. # Minimum number of interactions to generate -min_interactions = 675000 +min_interactions = 675000 # Percentages of each event type to generate product_added_percent = .08 @@ -214,10 +214,10 @@ def generate_interactions(users_df, products_df): if seconds_increment <= 0: raise AssertionError(f"Should never happen: {seconds_increment} <= 0") - print('Minimum interactions to generate: {}'.format(min_interactions)) - print('Starting timestamp: {} ({})'.format(next_timestamp, - time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(next_timestamp)))) - print('Seconds increment: {}'.format(seconds_increment)) + print(f'Minimum interactions to generate: {min_interactions}') + print(f'Starting timestamp: ({time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(next_timestamp))})') + print(f'Ending timestamp: ({time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(LAST_TIMESTAMP))})') + print(f'Seconds increment: {seconds_increment}') print("Generating interactions... (this will take a few minutes)") interactions = 0 @@ -380,7 +380,7 @@ def generate_interactions(users_df, products_df): discount_context = 'Yes' if discounted else 'No' - this_timestamp = next_timestamp + random.randint(1, seconds_increment) + this_timestamp = next_timestamp + random.randint(int(seconds_increment), int(seconds_increment * 2)) ow.event('View', this_timestamp, user, @@ -395,50 +395,50 @@ def generate_interactions(users_df, products_df): discounted_product_viewed_count += 1 if product_added_count < int(product_viewed_count * product_added_percent): - this_timestamp += random.randint(1, int(seconds_increment / 2)) + this_timestamp += random.randint(int(seconds_increment), int(seconds_increment * 2)) ow.event('AddToCart', this_timestamp, user, product=product, discount_context=discount_context) - interactions += 1 + #interactions += 1 product_added_count += 1 if discounted: discounted_product_added_count += 1 if cart_viewed_count < int(product_viewed_count * cart_viewed_percent): - this_timestamp += random.randint(1, int(seconds_increment / 2)) + this_timestamp += random.randint(int(seconds_increment), int(seconds_increment * 2)) ow.event('ViewCart', this_timestamp, user, product=product, discount_context=discount_context) - interactions += 1 + #interactions += 1 cart_viewed_count += 1 if discounted: discounted_cart_viewed_count += 1 if checkout_started_count < int(product_viewed_count * checkout_started_percent): - this_timestamp += random.randint(1, int(seconds_increment / 2)) + this_timestamp += random.randint(int(seconds_increment), int(seconds_increment * 2)) ow.event('StartCheckout', this_timestamp, user, product=product, discount_context=discount_context) - interactions += 1 + #interactions += 1 checkout_started_count += 1 if discounted: discounted_checkout_started_count += 1 if order_completed_count < int(product_viewed_count * order_completed_percent): - this_timestamp += random.randint(1, int(seconds_increment / 2)) + this_timestamp += random.randint(int(seconds_increment), int(seconds_increment * 2)) ow.event('Purchase', this_timestamp, user, product=product, discount_context=discount_context) - interactions += 1 + #interactions += 1 order_completed_count += 1 if discounted: discounted_order_completed_count += 1 diff --git a/generators/send_to_amplitude.py b/generators/send_to_amplitude.py new file mode 100644 index 000000000..3b56942ca --- /dev/null +++ b/generators/send_to_amplitude.py @@ -0,0 +1,66 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import datagenerator +import json +import requests +import yaml + +# Amplitude event support +# This follows the Amplitude V2 HTTP Bulk API spec, here: +# https://help.amplitude.com/hc/en-us/articles/360032842391-HTTP-API-V2 +# +# These classes accept a user, platform, and general event properties and map them +# into an Amplitude API compatible represenation. + + +amplitude_interactions_file = 'src/aws-lambda/personalize-pre-create-resources/data/amplitude/interactions.json' +amplitude_key = 'f6cdf64a6db24778bb9ce82188c19a95' + +class AmplitudeSender: + def __init__(self, config): + self.config = config # MUST BE: { 'api_key': } + self.endpoint = 'https://api.amplitude.com/2/httpapi' + + def send_batch(self, events, debug=False): + batch_events = { + "api_key": self.config['api_key'], + "events": events + } + + events_str = json.dumps(batch_events, default=lambda x: x.__dict__) + print(f'Batch length bytes: {len(events_str)}') + if debug: + parsed = json.loads(events_str) + #print(f'{json.dumps(parsed, indent=4)}') + response = None + else: + response = requests.post(self.endpoint, + data=events_str) + #print(self.config_keys[platform]) + #print(json.dumps(batch_events, default=lambda x: x.__dict__)) + print(f'Sent {len(batch_events["events"])} events and got {response}') + return response + +if __name__ == '__main__': + with open(amplitude_interactions_file, 'r') as events_file: + sender = AmplitudeSender( { 'api_key': amplitude_key }) + + total_event_count = 0 + send_event_count = 0 + events = [] + + for event in events_file: + events.append(json.loads(event)) + send_event_count += 1 + total_event_count += 1 + if send_event_count == 1000: + sender.send_batch(events) + print(f'sending {len(events)} events') + events = [] + send_event_count = 0 + + + if len(events) > 0: + sender.send_batch(events) + print(f'sending {len(events)} events') \ No newline at end of file From 4f04072cdb0a37c7f97bb8fa1fd86bd7676c07e5 Mon Sep 17 00:00:00 2001 From: Igor Date: Sat, 17 Sep 2022 10:58:51 -0700 Subject: [PATCH 04/45] removed comments --- generators/generate_interactions_personalize.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/generators/generate_interactions_personalize.py b/generators/generate_interactions_personalize.py index 7ef6f5139..ceb14d8d3 100644 --- a/generators/generate_interactions_personalize.py +++ b/generators/generate_interactions_personalize.py @@ -401,7 +401,6 @@ def generate_interactions(users_df, products_df): user, product=product, discount_context=discount_context) - #interactions += 1 product_added_count += 1 if discounted: @@ -414,7 +413,6 @@ def generate_interactions(users_df, products_df): user, product=product, discount_context=discount_context) - #interactions += 1 cart_viewed_count += 1 if discounted: discounted_cart_viewed_count += 1 @@ -426,7 +424,6 @@ def generate_interactions(users_df, products_df): user, product=product, discount_context=discount_context) - #interactions += 1 checkout_started_count += 1 if discounted: discounted_checkout_started_count += 1 @@ -438,7 +435,6 @@ def generate_interactions(users_df, products_df): user, product=product, discount_context=discount_context) - #interactions += 1 order_completed_count += 1 if discounted: discounted_order_completed_count += 1 From 919ec897af353bbdbd919f87edd83d08511a9097 Mon Sep 17 00:00:00 2001 From: Igor Date: Sat, 17 Sep 2022 11:06:02 -0700 Subject: [PATCH 05/45] minutes back to seconds for gen time --- generators/generate_interactions_personalize.py | 2 +- generators/send_to_amplitude.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/generators/generate_interactions_personalize.py b/generators/generate_interactions_personalize.py index ceb14d8d3..b8f01b284 100644 --- a/generators/generate_interactions_personalize.py +++ b/generators/generate_interactions_personalize.py @@ -270,7 +270,7 @@ def generate_interactions(users_df, products_df): if (time.time() > next_update_progress): rate = interactions / (time.time() - start_time_progress) to_go = (min_interactions - interactions) / rate - print(f'Generated {interactions} interactions so far (about {int(to_go/60)} minutes to go)') + print(f'Generated {interactions} interactions so far (about {int(to_go)} seconds to go)') next_update_progress += PROGRESS_MONITOR_SECONDS_UPDATE # Pick a random user diff --git a/generators/send_to_amplitude.py b/generators/send_to_amplitude.py index 3b56942ca..e817739ce 100644 --- a/generators/send_to_amplitude.py +++ b/generators/send_to_amplitude.py @@ -1,10 +1,8 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT-0 -import datagenerator import json import requests -import yaml # Amplitude event support # This follows the Amplitude V2 HTTP Bulk API spec, here: @@ -13,7 +11,6 @@ # These classes accept a user, platform, and general event properties and map them # into an Amplitude API compatible represenation. - amplitude_interactions_file = 'src/aws-lambda/personalize-pre-create-resources/data/amplitude/interactions.json' amplitude_key = 'f6cdf64a6db24778bb9ce82188c19a95' From feb717a23e94ea2cbbee444bf7926e7217fa23c9 Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 18 Sep 2022 12:36:27 -0700 Subject: [PATCH 06/45] added check to timestamp duration --- generators/generate_interactions_personalize.py | 14 +++++++++++--- .../src/repositories/recommendationsRepository.js | 3 +++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/generators/generate_interactions_personalize.py b/generators/generate_interactions_personalize.py index b8f01b284..707502633 100644 --- a/generators/generate_interactions_personalize.py +++ b/generators/generate_interactions_personalize.py @@ -114,6 +114,7 @@ def event(self, event_name, timestamp, product_id, user_id, discount_context): timestamp, discount_context]) +# This is a hack to get around numpy nonstandard type serialization to JSON def np_encoder(object): if isinstance(object, np.generic): return object.item() @@ -215,8 +216,8 @@ def generate_interactions(users_df, products_df): if seconds_increment <= 0: raise AssertionError(f"Should never happen: {seconds_increment} <= 0") print(f'Minimum interactions to generate: {min_interactions}') - print(f'Starting timestamp: ({time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(next_timestamp))})') - print(f'Ending timestamp: ({time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(LAST_TIMESTAMP))})') + print(f'Starting timestamp: {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(next_timestamp))}') + print(f'Ending timestamp: {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(LAST_TIMESTAMP))}') print(f'Seconds increment: {seconds_increment}') print("Generating interactions... (this will take a few minutes)") @@ -266,7 +267,7 @@ def generate_interactions(users_df, products_df): user_category_to_first_prod = {} - while interactions < min_interactions: + while interactions < min_interactions or next_timestamp < LAST_TIMESTAMP: if (time.time() > next_update_progress): rate = interactions / (time.time() - start_time_progress) to_go = (min_interactions - interactions) / rate @@ -402,6 +403,7 @@ def generate_interactions(users_df, products_df): product=product, discount_context=discount_context) product_added_count += 1 + interactions += 1 if discounted: discounted_product_added_count += 1 @@ -414,6 +416,8 @@ def generate_interactions(users_df, products_df): product=product, discount_context=discount_context) cart_viewed_count += 1 + interactions += 1 + if discounted: discounted_cart_viewed_count += 1 @@ -425,6 +429,8 @@ def generate_interactions(users_df, products_df): product=product, discount_context=discount_context) checkout_started_count += 1 + interactions += 1 + if discounted: discounted_checkout_started_count += 1 @@ -436,6 +442,8 @@ def generate_interactions(users_df, products_df): product=product, discount_context=discount_context) order_completed_count += 1 + interactions += 1 + if discounted: discounted_order_completed_count += 1 diff --git a/src/web-ui/src/repositories/recommendationsRepository.js b/src/web-ui/src/repositories/recommendationsRepository.js index d8a044e6f..212a20680 100644 --- a/src/web-ui/src/repositories/recommendationsRepository.js +++ b/src/web-ui/src/repositories/recommendationsRepository.js @@ -44,6 +44,9 @@ export default { return connection.get(related, { params: params }) }, + + // TODO: Add Amplitude Profile API switch here + getRecommendationsForUser(userID, currentItemID, numResults, feature) { let params = { userID: userID, From 00062855138408ea8eb717098456051342ee27ab Mon Sep 17 00:00:00 2001 From: Igor Date: Thu, 22 Sep 2022 13:16:55 -0700 Subject: [PATCH 07/45] cleaned up amplitude api keys --- aws/cloudformation-templates/base/ssm.yaml | 2 +- .../services/service/_template.yaml | 1 - .../services/service/pipeline.yaml | 7 ------- aws/cloudformation-templates/web-ui-pipeline.yaml | 7 ------- src/web-ui/buildspec.yml | 1 + 5 files changed, 2 insertions(+), 16 deletions(-) diff --git a/aws/cloudformation-templates/base/ssm.yaml b/aws/cloudformation-templates/base/ssm.yaml index cf88aeb77..e95fe43d4 100644 --- a/aws/cloudformation-templates/base/ssm.yaml +++ b/aws/cloudformation-templates/base/ssm.yaml @@ -180,7 +180,7 @@ Resources: ParameterAmplitudeApiKey: Type: "AWS::SSM::Parameter" Properties: - Name: "retaildemostore-amplitude-api-key" + Name: "/retaildemostore/webui/amplitude_api_key" Type: "String" Value: !If [HasAmplitudeApiKey, !Ref AmplitudeApiKey, "NONE"] Description: "Retail Demo Store Amplitude API key" diff --git a/aws/cloudformation-templates/services/service/_template.yaml b/aws/cloudformation-templates/services/service/_template.yaml index c31c92995..a37730596 100644 --- a/aws/cloudformation-templates/services/service/_template.yaml +++ b/aws/cloudformation-templates/services/service/_template.yaml @@ -257,7 +257,6 @@ Resources: SearchServiceExternalUrl: !Ref SearchServiceExternalUrl PinpointAppId: !Ref PinpointAppId ParameterPersonalizeEventTrackerId: !Ref ParameterPersonalizeEventTrackerId - ParameterAmplitudeApiKey: !Ref ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !Ref ParameterOptimizelySdkKey CleanupBucketLambdaArn: !Ref CleanupBucketLambdaArn DeleteRepositoryLambdaArn: !Ref DeleteRepositoryLambdaArn diff --git a/aws/cloudformation-templates/services/service/pipeline.yaml b/aws/cloudformation-templates/services/service/pipeline.yaml index 35cf10b6e..259874211 100644 --- a/aws/cloudformation-templates/services/service/pipeline.yaml +++ b/aws/cloudformation-templates/services/service/pipeline.yaml @@ -86,10 +86,6 @@ Parameters: ParameterPersonalizeEventTrackerId: Type: String - ParameterAmplitudeApiKey: - Type: String - Description: SSM parameter name for the Amplitude API key - ParameterOptimizelySdkKey: Type: String Description: SSM parameter name for the Optimizely SDK key @@ -320,9 +316,6 @@ Resources: - Name: PERSONALIZE_TRACKING_ID Type: PARAMETER_STORE Value: !Ref ParameterPersonalizeEventTrackerId - - Name: AMPLITUDE_API_KEY - Type: PARAMETER_STORE - Value: !Ref ParameterAmplitudeApiKey - Name: OPTIMIZELY_SDK_KEY Type: PARAMETER_STORE Value: !Ref ParameterOptimizelySdkKey diff --git a/aws/cloudformation-templates/web-ui-pipeline.yaml b/aws/cloudformation-templates/web-ui-pipeline.yaml index a7b991287..8bc9e3a79 100644 --- a/aws/cloudformation-templates/web-ui-pipeline.yaml +++ b/aws/cloudformation-templates/web-ui-pipeline.yaml @@ -118,10 +118,6 @@ Parameters: ParameterPersonalizeEventTrackerId: Type: String - ParameterAmplitudeApiKey: - Type: String - Description: SSM parameter name for the Amplitude API key parameter name - ParameterOptimizelySdkKey: Type: String Description: SSM Parameter name for the Optimizely SDK key parameter name @@ -433,9 +429,6 @@ Resources: - Name: PERSONALIZE_TRACKING_ID Type: PARAMETER_STORE Value: !Ref ParameterPersonalizeEventTrackerId - - Name: AMPLITUDE_API_KEY - Type: PARAMETER_STORE - Value: !Ref ParameterAmplitudeApiKey - Name: OPTIMIZELY_SDK_KEY Type: PARAMETER_STORE Value: !Ref ParameterOptimizelySdkKey diff --git a/src/web-ui/buildspec.yml b/src/web-ui/buildspec.yml index 875bdf999..a5d50836b 100644 --- a/src/web-ui/buildspec.yml +++ b/src/web-ui/buildspec.yml @@ -4,6 +4,7 @@ env: parameter-store: MPARTICLE_API_KEY: "/retaildemostore/webui/mparticle_api_key" MPARTICLE_SECRET_KEY: "/retaildemostore/webui/mparticle_secret_key" + AMPLITUDE_API_KEY: "/retaildemostore/webui/amplitude_api_key" phases: install: From 6a97bb7fa2340b873f31ba1eb54d28b95f37e126 Mon Sep 17 00:00:00 2001 From: Igor Date: Thu, 22 Sep 2022 16:57:21 -0700 Subject: [PATCH 08/45] removed amplitude api key --- .../services/_template.yaml | 13 ------------- aws/cloudformation-templates/template.yaml | 2 -- 2 files changed, 15 deletions(-) diff --git a/aws/cloudformation-templates/services/_template.yaml b/aws/cloudformation-templates/services/_template.yaml index d7d0591e3..3bef72eda 100644 --- a/aws/cloudformation-templates/services/_template.yaml +++ b/aws/cloudformation-templates/services/_template.yaml @@ -100,10 +100,6 @@ Parameters: Type: String Description: SSM parameter name for the Personalize event tracking ID - ParameterAmplitudeApiKey: - Type: String - Description: SSM parameter name for the Amplitude API key - ParameterOptimizelySdkKey: Type: String Description: SSM parameter name for the Optimizely SDK key @@ -160,7 +156,6 @@ Resources: CategoriesTable: !Ref CategoriesTable ExperimentStrategyTable: !Ref ExperimentStrategyTable ParameterPersonalizeEventTrackerId: !Ref ParameterPersonalizeEventTrackerId - ParameterAmplitudeApiKey: !Ref ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !Ref ParameterOptimizelySdkKey CleanupBucketLambdaArn: !Ref CleanupBucketLambdaArn DeleteRepositoryLambdaArn: !GetAtt DeleteRepositoryLambdaFunction.Arn @@ -193,7 +188,6 @@ Resources: ClusterName: !Ref ClusterName ServiceDiscoveryNamespace: !Ref ServiceDiscoveryNamespace ParameterPersonalizeEventTrackerId: !Ref ParameterPersonalizeEventTrackerId - ParameterAmplitudeApiKey: !Ref ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !Ref ParameterOptimizelySdkKey PinpointAppId: !Ref PinpointAppId CleanupBucketLambdaArn: !Ref CleanupBucketLambdaArn @@ -227,7 +221,6 @@ Resources: ClusterName: !Ref ClusterName ServiceDiscoveryNamespace: !Ref ServiceDiscoveryNamespace ParameterPersonalizeEventTrackerId: !Ref ParameterPersonalizeEventTrackerId - ParameterAmplitudeApiKey: !Ref ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !Ref ParameterOptimizelySdkKey CleanupBucketLambdaArn: !Ref CleanupBucketLambdaArn DeleteRepositoryLambdaArn: !GetAtt DeleteRepositoryLambdaFunction.Arn @@ -260,7 +253,6 @@ Resources: ClusterName: !Ref ClusterName ServiceDiscoveryNamespace: !Ref ServiceDiscoveryNamespace ParameterPersonalizeEventTrackerId: !Ref ParameterPersonalizeEventTrackerId - ParameterAmplitudeApiKey: !Ref ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !Ref ParameterOptimizelySdkKey CleanupBucketLambdaArn: !Ref CleanupBucketLambdaArn DeleteRepositoryLambdaArn: !GetAtt DeleteRepositoryLambdaFunction.Arn @@ -294,7 +286,6 @@ Resources: ServiceDiscoveryNamespace: !Ref ServiceDiscoveryNamespace EnvOpenSearchDomainEndpoint: !Ref EnvOpenSearchDomainEndpoint ParameterPersonalizeEventTrackerId: !Ref ParameterPersonalizeEventTrackerId - ParameterAmplitudeApiKey: !Ref ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !Ref ParameterOptimizelySdkKey CleanupBucketLambdaArn: !Ref CleanupBucketLambdaArn DeleteRepositoryLambdaArn: !GetAtt DeleteRepositoryLambdaFunction.Arn @@ -328,7 +319,6 @@ Resources: ServiceDiscoveryNamespace: !Ref ServiceDiscoveryNamespace EnvOpenSearchDomainEndpoint: !Ref EnvOpenSearchDomainEndpoint ParameterPersonalizeEventTrackerId: !Ref ParameterPersonalizeEventTrackerId - ParameterAmplitudeApiKey: !Ref ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !Ref ParameterOptimizelySdkKey CleanupBucketLambdaArn: !Ref CleanupBucketLambdaArn DeleteRepositoryLambdaArn: !GetAtt DeleteRepositoryLambdaFunction.Arn @@ -362,7 +352,6 @@ Resources: ServiceDiscoveryNamespace: !Ref ServiceDiscoveryNamespace EnvOpenSearchDomainEndpoint: !Ref EnvOpenSearchDomainEndpoint ParameterPersonalizeEventTrackerId: !Ref ParameterPersonalizeEventTrackerId - ParameterAmplitudeApiKey: !Ref ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !Ref ParameterOptimizelySdkKey CleanupBucketLambdaArn: !Ref CleanupBucketLambdaArn DeleteRepositoryLambdaArn: !GetAtt DeleteRepositoryLambdaFunction.Arn @@ -398,7 +387,6 @@ Resources: CategoriesTable: !Ref CategoriesTable ExperimentStrategyTable: !Ref ExperimentStrategyTable ParameterPersonalizeEventTrackerId: !Ref ParameterPersonalizeEventTrackerId - ParameterAmplitudeApiKey: !Ref ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !Ref ParameterOptimizelySdkKey CleanupBucketLambdaArn: !Ref CleanupBucketLambdaArn DeleteRepositoryLambdaArn: !GetAtt DeleteRepositoryLambdaFunction.Arn @@ -431,7 +419,6 @@ Resources: ClusterName: !Ref ClusterName ServiceDiscoveryNamespace: !Ref ServiceDiscoveryNamespace ParameterPersonalizeEventTrackerId: !Ref ParameterPersonalizeEventTrackerId - ParameterAmplitudeApiKey: !Ref ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !Ref ParameterOptimizelySdkKey ParameterIVSVideoChannelMap: !Ref ParameterIVSVideoChannelMap CleanupBucketLambdaArn: !Ref CleanupBucketLambdaArn diff --git a/aws/cloudformation-templates/template.yaml b/aws/cloudformation-templates/template.yaml index 7d691f6a7..ac8995883 100644 --- a/aws/cloudformation-templates/template.yaml +++ b/aws/cloudformation-templates/template.yaml @@ -554,7 +554,6 @@ Resources: CategoriesTable: !GetAtt Base.Outputs.CategoriesTable ExperimentStrategyTable: !GetAtt Base.Outputs.ExperimentStrategyTable ParameterPersonalizeEventTrackerId: !GetAtt Base.Outputs.ParameterPersonalizeEventTrackerId - ParameterAmplitudeApiKey: !GetAtt Base.Outputs.ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !GetAtt Base.Outputs.ParameterOptimizelySdkKey CleanupBucketLambdaArn: !GetAtt CleanupBucket.Outputs.LambdaFunctionArn ParameterIVSVideoChannelMap: !GetAtt Base.Outputs.ParameterIVSVideoChannelMap @@ -618,7 +617,6 @@ Resources: !GetAtt Location.Outputs.LocationNotificationEndpoint, "NotDeployed", ] - ParameterAmplitudeApiKey: !GetAtt Base.Outputs.ParameterAmplitudeApiKey ParameterOptimizelySdkKey: !GetAtt Base.Outputs.ParameterOptimizelySdkKey ParameterSegmentWriteKey: !GetAtt Base.Outputs.ParameterSegmentWriteKey ParameterGoogleAnalyticsMeasurementId: !GetAtt Base.Outputs.ParameterGoogleAnalyticsMeasurementId From 1f67ad536e31704d288e819e85e7b2b3b84f6612 Mon Sep 17 00:00:00 2001 From: Igor Date: Thu, 22 Sep 2022 19:18:24 -0700 Subject: [PATCH 09/45] cast float time to int --- generators/generate_interactions_personalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generators/generate_interactions_personalize.py b/generators/generate_interactions_personalize.py index 707502633..7775a0371 100644 --- a/generators/generate_interactions_personalize.py +++ b/generators/generate_interactions_personalize.py @@ -111,7 +111,7 @@ def event(self, event_name, timestamp, product_id, user_id, discount_context): self.csv_file.writerow([product_id, user_id, event_name, - timestamp, + int(timestamp), discount_context]) # This is a hack to get around numpy nonstandard type serialization to JSON From 5cec7cb00e6c2081593aea66bdb12e6bd2a30e1c Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 28 Sep 2022 08:55:23 -0700 Subject: [PATCH 10/45] testing amplitude user identify --- src/web-ui/src/analytics/AnalyticsHandler.js | 31 +++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/web-ui/src/analytics/AnalyticsHandler.js b/src/web-ui/src/analytics/AnalyticsHandler.js index e595f9c52..7f0984ba7 100644 --- a/src/web-ui/src/analytics/AnalyticsHandler.js +++ b/src/web-ui/src/analytics/AnalyticsHandler.js @@ -235,7 +235,34 @@ export const AnalyticsHandler = { if (this.mParticleEnabled()) { window.mParticle.logEvent('UserSignedIn', window.mParticle.EventType.Transaction, { "method": "Web" }); - } + } + + if (this.amplitudeEnabled()) { + // Amplitude identify call + Amplitude.getInstance().setUserId(user.id); + // Should we be doing this. Need to support case of switching + // users and not getting sessions confused. + Amplitude.getInstance().regenerateDeviceId(); + + var identify = new Amplitude.Identify() + .set('username', user.username) + .set('email', user.email) + .set('firstName', user.first_name) + .set('lastName', user.last_name) + .set('gender', user.gender) + .set('age', user.age) + .set('persona', user.persona) + + if (user.sign_up_date) { + identify.setOnce('signUpDate', user.sign_up_date) + } + + if (user.last_sign_in_date) { + identify.set('lastSignInDate', user.last_sign_in_date) + } + + Amplitude.getInstance().identify(identify) + } } }, @@ -638,6 +665,7 @@ export const AnalyticsHandler = { } if (this.amplitudeEnabled()) { + eventProperties['user_id'] = user.id; Amplitude.getInstance().logEvent('View', eventProperties); } @@ -1024,6 +1052,7 @@ export const AnalyticsHandler = { } if (this.amplitudeEnabled()) { + eventProperties['user_id'] = user.id Amplitude.getInstance().logEvent('Search', eventProperties); } From d5de489f1c0c995451843c6233037067a6805b79 Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 28 Sep 2022 09:07:56 -0700 Subject: [PATCH 11/45] log amp id --- src/web-ui/src/analytics/AnalyticsHandler.js | 30 ++------------------ 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/src/web-ui/src/analytics/AnalyticsHandler.js b/src/web-ui/src/analytics/AnalyticsHandler.js index 7f0984ba7..93d6bc0e0 100644 --- a/src/web-ui/src/analytics/AnalyticsHandler.js +++ b/src/web-ui/src/analytics/AnalyticsHandler.js @@ -236,33 +236,6 @@ export const AnalyticsHandler = { if (this.mParticleEnabled()) { window.mParticle.logEvent('UserSignedIn', window.mParticle.EventType.Transaction, { "method": "Web" }); } - - if (this.amplitudeEnabled()) { - // Amplitude identify call - Amplitude.getInstance().setUserId(user.id); - // Should we be doing this. Need to support case of switching - // users and not getting sessions confused. - Amplitude.getInstance().regenerateDeviceId(); - - var identify = new Amplitude.Identify() - .set('username', user.username) - .set('email', user.email) - .set('firstName', user.first_name) - .set('lastName', user.last_name) - .set('gender', user.gender) - .set('age', user.age) - .set('persona', user.persona) - - if (user.sign_up_date) { - identify.setOnce('signUpDate', user.sign_up_date) - } - - if (user.last_sign_in_date) { - identify.set('lastSignInDate', user.last_sign_in_date) - } - - Amplitude.getInstance().identify(identify) - } } }, @@ -386,6 +359,9 @@ export const AnalyticsHandler = { } if (this.amplitudeEnabled()) { + if(user) { + console.log(`Amp User ID: ${user.id}`); + } Amplitude.getInstance().logEvent('AddToCart', eventProperties); } From 97f2d3a056a62d386ea97b64c44a037e5e4f6099 Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 28 Sep 2022 09:29:41 -0700 Subject: [PATCH 12/45] generate a correct amp user id to match gen data --- src/web-ui/src/analytics/AnalyticsHandler.js | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/web-ui/src/analytics/AnalyticsHandler.js b/src/web-ui/src/analytics/AnalyticsHandler.js index 93d6bc0e0..5d2f0941d 100644 --- a/src/web-ui/src/analytics/AnalyticsHandler.js +++ b/src/web-ui/src/analytics/AnalyticsHandler.js @@ -33,6 +33,16 @@ export const AnalyticsHandler = { } }, + generateAmplitudeUserId(user) { + if(user) { + console.log(`Amplitude User ID: ${user.id}`); + // Retail Demo Store has a lot of sub-5 digit user IDs this will follow the Amplitude 5 char user ID string spec + return String(user.id).padStart(5, '0'); + } + + return null; + }, + async identify(user) { if (!user) { return Promise.resolve() @@ -359,9 +369,7 @@ export const AnalyticsHandler = { } if (this.amplitudeEnabled()) { - if(user) { - console.log(`Amp User ID: ${user.id}`); - } + eventProperties['user_id'] = this.generateAmplitudeUserId(user); Amplitude.getInstance().logEvent('AddToCart', eventProperties); } From c20d47c75b2245a1aced38695a256f04be4c7b20 Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 28 Sep 2022 09:32:55 -0700 Subject: [PATCH 13/45] log amp string instead --- src/web-ui/src/analytics/AnalyticsHandler.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/web-ui/src/analytics/AnalyticsHandler.js b/src/web-ui/src/analytics/AnalyticsHandler.js index 5d2f0941d..093fff302 100644 --- a/src/web-ui/src/analytics/AnalyticsHandler.js +++ b/src/web-ui/src/analytics/AnalyticsHandler.js @@ -35,9 +35,10 @@ export const AnalyticsHandler = { generateAmplitudeUserId(user) { if(user) { - console.log(`Amplitude User ID: ${user.id}`); + ampID = String(user.id).padStart(5, '0'); + console.log(`Amplitude User ID: ${ampID}`); // Retail Demo Store has a lot of sub-5 digit user IDs this will follow the Amplitude 5 char user ID string spec - return String(user.id).padStart(5, '0'); + return ampID; } return null; From 550bfbb636b253457f39b5f5c0feae825f1a63f0 Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 28 Sep 2022 09:39:11 -0700 Subject: [PATCH 14/45] define ampID --- src/web-ui/src/analytics/AnalyticsHandler.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/web-ui/src/analytics/AnalyticsHandler.js b/src/web-ui/src/analytics/AnalyticsHandler.js index 093fff302..02ca46e0f 100644 --- a/src/web-ui/src/analytics/AnalyticsHandler.js +++ b/src/web-ui/src/analytics/AnalyticsHandler.js @@ -35,7 +35,7 @@ export const AnalyticsHandler = { generateAmplitudeUserId(user) { if(user) { - ampID = String(user.id).padStart(5, '0'); + let ampID = String(user.id).padStart(5, '0'); console.log(`Amplitude User ID: ${ampID}`); // Retail Demo Store has a lot of sub-5 digit user IDs this will follow the Amplitude 5 char user ID string spec return ampID; From 834c8302b1ee88c05b22a60a708c7167b07a3391 Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 28 Sep 2022 09:58:57 -0700 Subject: [PATCH 15/45] amplitude identify function --- src/web-ui/src/analytics/AnalyticsHandler.js | 32 +++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/web-ui/src/analytics/AnalyticsHandler.js b/src/web-ui/src/analytics/AnalyticsHandler.js index 02ca46e0f..486e6f925 100644 --- a/src/web-ui/src/analytics/AnalyticsHandler.js +++ b/src/web-ui/src/analytics/AnalyticsHandler.js @@ -38,12 +38,42 @@ export const AnalyticsHandler = { let ampID = String(user.id).padStart(5, '0'); console.log(`Amplitude User ID: ${ampID}`); // Retail Demo Store has a lot of sub-5 digit user IDs this will follow the Amplitude 5 char user ID string spec + // to match data generated by the data generator and the Amplitude API spec return ampID; } return null; }, + amplitudeIdentify(user) { + if(user) { + // Amplitude identify call + Amplitude.getInstance().setUserId(this.generateAmplitudeUserId(user)); + // Should we be doing this. Need to support case of switching + // users and not getting sessions confused. + Amplitude.getInstance().regenerateDeviceId(); + + var identify = new Amplitude.Identify() + .set('username', user.username) + .set('email', user.email) + .set('firstName', user.first_name) + .set('lastName', user.last_name) + .set('gender', user.gender) + .set('age', user.age) + .set('persona', user.persona) + + if (user.sign_up_date) { + identify.setOnce('signUpDate', user.sign_up_date) + } + + if (user.last_sign_in_date) { + identify.set('lastSignInDate', user.last_sign_in_date) + } + + Amplitude.getInstance().identify(identify) + } + }, + async identify(user) { if (!user) { return Promise.resolve() @@ -650,7 +680,7 @@ export const AnalyticsHandler = { } if (this.amplitudeEnabled()) { - eventProperties['user_id'] = user.id; + this.amplitudeIdentify(user); Amplitude.getInstance().logEvent('View', eventProperties); } From 75e6b899159808d3438480b329635b0258aa60d4 Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 28 Sep 2022 11:25:28 -0700 Subject: [PATCH 16/45] add identify; add required env params --- .../base/_template.yaml | 12 ++++++++ aws/cloudformation-templates/base/ssm.yaml | 30 +++++++++++++++++++ aws/cloudformation-templates/template.yaml | 18 +++++++++++ src/web-ui/src/analytics/AnalyticsHandler.js | 14 ++++++--- src/web-ui/src/public/Main.vue | 3 ++ 5 files changed, 73 insertions(+), 4 deletions(-) diff --git a/aws/cloudformation-templates/base/_template.yaml b/aws/cloudformation-templates/base/_template.yaml index 5944b0ac0..740d7ad1a 100644 --- a/aws/cloudformation-templates/base/_template.yaml +++ b/aws/cloudformation-templates/base/_template.yaml @@ -43,6 +43,16 @@ Parameters: Description: Amplitude API key for product analytics and A/B testing results (optional). NoEcho: true + AmplitudeSecretApiKey: + Type: String + Description: Amplitude secret API key from you project - this is used for the Profile API + NoEcho: true + + AmplitudeRecommendationID: + Type: String + Description: Amplitude Receommender ID from the Recommender Details page + NoEcho: false + ParentStackName: Type: String Description: Parent stack name @@ -269,6 +279,8 @@ Resources: StackBucketName: !GetAtt Buckets.Outputs.StackBucketName ExperimentStrategyTableName: !GetAtt Tables.Outputs.ExperimentStrategyTable AmplitudeApiKey: !Ref AmplitudeApiKey + AmplitudeSecretApiKey: !Ref AmplitudeSecretApiKey + AmplitudeRecommendationID: !Ref AmplitudeRecommendationID OptimizelySdkKey: !Ref OptimizelySdkKey SegmentWriteKey: !Ref SegmentWriteKey mParticleOrgId: !Ref mParticleOrgId diff --git a/aws/cloudformation-templates/base/ssm.yaml b/aws/cloudformation-templates/base/ssm.yaml index e95fe43d4..80fe1ad18 100644 --- a/aws/cloudformation-templates/base/ssm.yaml +++ b/aws/cloudformation-templates/base/ssm.yaml @@ -16,6 +16,16 @@ Parameters: Description: Amplitude API key for product analytics and A/B testing results (optional). NoEcho: true + AmplitudeSecretApiKey: + Type: String + Description: Amplitude secret API key from you project - this is used for the Profile API + NoEcho: true + + AmplitudeRecommendationID: + Type: String + Description: Amplitude Receommender ID from the Recommender Details page + NoEcho: false + OptimizelySdkKey: Type: String Description: Optimizely SDK key for experimentation (optional). @@ -61,6 +71,10 @@ Parameters: Conditions: HasAmplitudeApiKey: !Not [!Equals [!Ref AmplitudeApiKey, ""]] + HasAmplitudeSecretApiKey: + !Not [!Equals [!Ref AmplitudeSecretApiKey, ""]] + HasAmplitudeRecommendationID: + !Not [!Equals [!Ref AmplitudeRecommendationID, ""]] HasOptimizelySdkKey: !Not [!Equals [!Ref OptimizelySdkKey, ""]] HasSegmentWriteKey: @@ -185,6 +199,22 @@ Resources: Value: !If [HasAmplitudeApiKey, !Ref AmplitudeApiKey, "NONE"] Description: "Retail Demo Store Amplitude API key" + ParameterAmplitudeSecretApiKey: + Type: "AWS::SSM::Parameter" + Properties: + Name: "/retaildemostore/webui/amplitude_secret_api_key" + Type: "String" + Value: !If [HasAmplitudeSecretApiKey, !Ref AmplitudeSecretApiKey, "NONE"] + Description: "Retail Demo Store Amplitude secret API key" + + ParameterAmplitudeRecommendationID: + Type: "AWS::SSM::Parameter" + Properties: + Name: "/retaildemostore/webui/amplitude_recommendation_id" + Type: "String" + Value: !If [HasAmplitudeRecommendationID, !Ref AmplitudeRecommendationID, "NONE"] + Description: "Retail Demo Store Amplitude Recommender ID (see Recommender Details page)" + ParameterOptimizelySdkKey: Type: "AWS::SSM::Parameter" Properties: diff --git a/aws/cloudformation-templates/template.yaml b/aws/cloudformation-templates/template.yaml index ac8995883..b1cdfb986 100644 --- a/aws/cloudformation-templates/template.yaml +++ b/aws/cloudformation-templates/template.yaml @@ -59,6 +59,8 @@ Metadata: default: "Optional Integrations" Parameters: - AmplitudeApiKey + - AmplitudeSecretApiKey + - AmplitudeRecommendationID - OptimizelySdkKey - IncludeSegmentDependencies - SegmentWriteKey @@ -117,6 +119,10 @@ Metadata: default: "Deploy personalized offers and pickup notices using Location Services geofencing" AmplitudeApiKey: default: "Amplitude API Key" + AmplitudeSecretApiKey: + default: "Amplitude Project Secret API Key" + AmplitudeRecommendationID: + default: "Amplitude Recommender ID (from the Recommender Details page)" OptimizelySdkKey: default: "Optimizely SDK Key" AmazonPayStoreId: @@ -292,6 +298,16 @@ Parameters: Description: Amplitude API key for integrated product analytics and A/B testing results (optional). NoEcho: true + AmplitudeSecretApiKey: + Type: String + Description: Amplitude secret API key from you project - this is used for the Profile API + NoEcho: true + + AmplitudeRecommendationID: + Type: String + Description: Amplitude Receommender ID from the Recommender Details page + NoEcho: false + OptimizelySdkKey: Type: String Description: Optimizely SDK key for experimentation (optional). @@ -505,6 +521,8 @@ Resources: CreateOpenSearchServiceLinkedRole: !Ref CreateOpenSearchServiceLinkedRole CleanupBucketLambdaArn: !GetAtt CleanupBucket.Outputs.LambdaFunctionArn AmplitudeApiKey: !Ref AmplitudeApiKey + AmplitudeSecretApiKey: !Ref AmplitudeSecretApiKey + AmplitudeRecommendationID: !Ref AmplitudeRecommendationID ParentStackName: !Ref AWS::StackName OptimizelySdkKey: !Ref OptimizelySdkKey SegmentWriteKey: !Ref SegmentWriteKey diff --git a/src/web-ui/src/analytics/AnalyticsHandler.js b/src/web-ui/src/analytics/AnalyticsHandler.js index 486e6f925..33bae4d62 100644 --- a/src/web-ui/src/analytics/AnalyticsHandler.js +++ b/src/web-ui/src/analytics/AnalyticsHandler.js @@ -19,8 +19,8 @@ export const AnalyticsHandler = { clearUser() { if (this.amplitudeEnabled()) { // Update Amplitude user - Amplitude.getInstance().setUserId(null) - Amplitude.getInstance().regenerateDeviceId() + Amplitude.getInstance().setUserId(null); + Amplitude.getInstance().regenerateDeviceId(); } if (this.mParticleEnabled()) { var identityCallback = function() { @@ -400,7 +400,7 @@ export const AnalyticsHandler = { } if (this.amplitudeEnabled()) { - eventProperties['user_id'] = this.generateAmplitudeUserId(user); + this.amplitudeIdentify(user); Amplitude.getInstance().logEvent('AddToCart', eventProperties); } @@ -543,6 +543,7 @@ export const AnalyticsHandler = { } if (this.amplitudeEnabled()) { + this.amplitudeIdentify(user); Amplitude.getInstance().logEvent('RemoveFromCart', eventProperties); } @@ -617,6 +618,7 @@ export const AnalyticsHandler = { } if (this.amplitudeEnabled()) { + this.amplitudeIdentify(user); Amplitude.getInstance().logEvent('UpdateQuantity', eventProperties); } }, @@ -776,6 +778,7 @@ export const AnalyticsHandler = { if (this.amplitudeEnabled()) { // Amplitude event + this.amplitudeIdentify(user); Amplitude.getInstance().logEvent('ViewCart', eventProperties); } @@ -863,6 +866,7 @@ export const AnalyticsHandler = { } if (this.amplitudeEnabled()) { + this.amplitudeIdentify(user); Amplitude.getInstance().logEvent('StartCheckout', eventProperties); } @@ -936,6 +940,7 @@ export const AnalyticsHandler = { if (this.amplitudeEnabled()) { // Amplitude revenue + this.amplitudeIdentify(user); let revenue = new Amplitude.Revenue() .setProductId(orderItem.product_id.toString()) .setPrice(+orderItem.price.toFixed(2)) @@ -998,6 +1003,7 @@ export const AnalyticsHandler = { } if (this.amplitudeEnabled()) { + this.amplitudeIdentify(user); Amplitude.getInstance().logEvent('Purchase', eventProperties); } @@ -1067,7 +1073,7 @@ export const AnalyticsHandler = { } if (this.amplitudeEnabled()) { - eventProperties['user_id'] = user.id + this.amplitudeIdentify(user); Amplitude.getInstance().logEvent('Search', eventProperties); } diff --git a/src/web-ui/src/public/Main.vue b/src/web-ui/src/public/Main.vue index c2562e519..a3c1c2f23 100644 --- a/src/web-ui/src/public/Main.vue +++ b/src/web-ui/src/public/Main.vue @@ -183,6 +183,9 @@ export default { } if (response.headers) { + + // TODO: ADD AMPLITUDE is_control CHECK HERE TO PRESENT RECS OR NOT + const experimentName = response.headers['x-experiment-name']; const personalizeRecipe = response.headers['x-personalize-recipe']; From 57b164a6f0861b8a32f1edacee395d8e397f3cfd Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 28 Sep 2022 11:43:47 -0700 Subject: [PATCH 17/45] add webui app config spec --- src/web-ui/buildspec.yml | 4 +++- src/web-ui/gen_env.sh | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/web-ui/buildspec.yml b/src/web-ui/buildspec.yml index a5d50836b..aaccb32bf 100644 --- a/src/web-ui/buildspec.yml +++ b/src/web-ui/buildspec.yml @@ -5,7 +5,9 @@ env: MPARTICLE_API_KEY: "/retaildemostore/webui/mparticle_api_key" MPARTICLE_SECRET_KEY: "/retaildemostore/webui/mparticle_secret_key" AMPLITUDE_API_KEY: "/retaildemostore/webui/amplitude_api_key" - + AMPLITUDE_SECRET_API_KEY: "/retaildemostore/webui/amplitude_secret_api_key" + AMPLITUDE_RECOMMENDATION_ID: "/retaildemostore/webui/amplitude_recommendation_id" + phases: install: runtime-versions: diff --git a/src/web-ui/gen_env.sh b/src/web-ui/gen_env.sh index 2c86175e8..94d8d8a94 100755 --- a/src/web-ui/gen_env.sh +++ b/src/web-ui/gen_env.sh @@ -44,6 +44,8 @@ printf 'VUE_APP_AMAZON_PAY_STORE_ID=%s\n' "$AMAZON_PAY_STORE_ID" >> .env printf 'VUE_APP_AMAZON_PAY_MERCHANT_ID=%s\n' "$AMAZON_PAY_MERCHANT_ID" >> .env printf 'VUE_APP_AMPLITUDE_API_KEY=%s\n' "$AMPLITUDE_API_KEY" >> .env +printf 'VUE_APP_AMPLITUDE_SECRET_API_KEY=%s\n' "$AMPLITUDE_SECRET_API_KEY" >> .env +printf 'VUE_APP_AMPLITUDE_RECOMMENDATION_ID=%s\n' "$AMPLITUDE_RECOMMENDATION_ID" >> .env printf 'VUE_APP_OPTIMIZELY_SDK_KEY=%s\n' "$OPTIMIZELY_SDK_KEY" >> .env printf 'VUE_APP_SEGMENT_WRITE_KEY=%s\n' "$SEGMENT_WRITE_KEY" >> .env From ad2e9b219b08ad0c27b90fd4cc4fb02618705cbd Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 28 Sep 2022 13:20:17 -0700 Subject: [PATCH 18/45] added amplitude recs api requests --- .../repositories/recommendationsRepository.js | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/web-ui/src/repositories/recommendationsRepository.js b/src/web-ui/src/repositories/recommendationsRepository.js index 212a20680..3625bebb2 100644 --- a/src/web-ui/src/repositories/recommendationsRepository.js +++ b/src/web-ui/src/repositories/recommendationsRepository.js @@ -14,6 +14,13 @@ const connection = axios.create({ baseURL }) +// Per the Amplitude Recommendation API spec: https://developers.amplitude.com/docs/user-profile-api +// and the Recommendations spec: https://help.amplitude.com/hc/en-us/articles/360059626072-Use-recommendations-in-personalization-campaigns +const amplitudeAPI = axios.create({ + baseURL: 'https://profile-api.amplitude.com', + headers: { 'Authorization': `Api-Key ${process.env.VUE_APP_AMPLITUDE_SECRET_API_KEY}`} +}); + const popular = "/popular" const related = "/related" const recommendations = "/recommendations" @@ -21,6 +28,7 @@ const rerank = "/rerank" const chooseDiscounted = "/choose_discounted" const couponOffer = "/coupon_offer" const experimentOutcome = "/experiment/outcome" +const amplitudeUserProfile = "/v1/userprofile" export default { getPopularProducts(userID, currentItemID, numResults, feature) { @@ -46,6 +54,16 @@ export default { }, // TODO: Add Amplitude Profile API switch here + getAmplitudeRecommendationsForUser(userID) { + let ampID = String(userID).padStart(5, '0'); + console.log(`Getting Amplitude recommendations for userID ${ampID}`); + let params = { + user_id: ampID, + rec_id: process.env.VUE_APP_AMPLITUDE_RECOMMENDATION_ID + }; + + return amplitudeAPI.get(amplitudeUserProfile, { params: params }); + }, getRecommendationsForUser(userID, currentItemID, numResults, feature) { let params = { From 99cbbab7a009209bbcfccbe5be60e94c1cc8053c Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 28 Sep 2022 15:36:05 -0700 Subject: [PATCH 19/45] added amplitude results to main.vue --- src/web-ui/src/public/Main.vue | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/web-ui/src/public/Main.vue b/src/web-ui/src/public/Main.vue index a3c1c2f23..7f621196d 100644 --- a/src/web-ui/src/public/Main.vue +++ b/src/web-ui/src/public/Main.vue @@ -161,17 +161,19 @@ export default { this.userRecommendationsDemoGuideBadgeArticle = null; var response; - if (this.personalizeRecommendationsForVisitor) { + if (this.personalizeRecommendationsForVisitor && process.env.VUE_APP_AMPLITUDE_RECOMMENDATION_ID !== 'NONE' && + process.env.VUE_APP_AMPLITUDE_RECOMMENDATION_ID ) { + response = await RecommendationsRepository.getAmplitudeRecommendationsForUser(this.personalizeUserID); + } else if (this.personalizeRecommendationsForVisitor) { + this.featureUserRecs = EXPERIMENT_USER_RECS_FEATURE; response = await RecommendationsRepository.getRecommendationsForUser( this.personalizeUserID, '', MAX_RECOMMENDATIONS, - this.featureUserRecs - ); - } - else { + this.featureUserRecs); + } else { this.featureUserRecs = EXPERIMENT_USER_RECS_COLD_FEATURE; response = await RecommendationsRepository.getPopularProducts( @@ -183,9 +185,6 @@ export default { } if (response.headers) { - - // TODO: ADD AMPLITUDE is_control CHECK HERE TO PRESENT RECS OR NOT - const experimentName = response.headers['x-experiment-name']; const personalizeRecipe = response.headers['x-personalize-recipe']; @@ -207,6 +206,23 @@ export default { if (this.userRecommendations.length > 0 && 'experiment' in this.userRecommendations[0]) { AnalyticsHandler.identifyExperiment(this.user, this.userRecommendations[0].experiment); } + } else { // This is an Amplitude response + // Check if this is a control user or actual recommendations + if (response.userData.recommendations[0].is_control) { + this.userRecommendationsTitle = 'Popular products'; + this.featureUserRecs = EXPERIMENT_USER_RECS_COLD_FEATURE; + + response = await RecommendationsRepository.getPopularProducts( + this.personalizeUserID, + '', + MAX_RECOMMENDATIONS, + this.featureUserRecs + ); + this.userRecommendations = response.data; + } else { + this.userRecommendationsTitle = 'Inspired by your shopping trends (Amplitude)' + this.userRecommendations = response.userData.recommendations[0].items; + } } } From 4bb7b3ba99298459586b47524f3f28ab5944a812 Mon Sep 17 00:00:00 2001 From: Igor Date: Wed, 28 Sep 2022 16:53:40 -0700 Subject: [PATCH 20/45] revert to older version --- src/web-ui/src/public/Main.vue | 75 ++++++++++------------------------ 1 file changed, 22 insertions(+), 53 deletions(-) diff --git a/src/web-ui/src/public/Main.vue b/src/web-ui/src/public/Main.vue index 7f621196d..e9d5fddf2 100644 --- a/src/web-ui/src/public/Main.vue +++ b/src/web-ui/src/public/Main.vue @@ -5,11 +5,8 @@

{{ userRecommendationsTitle }} - +

@@ -17,45 +14,33 @@
-
+
- +
- Personalized recommendations do not appear to be enabled for this instance of the storefront yet. Please complete the Personalization workshop labs to add personalized capabilities. + Personalized recommendations do not appear to be enabled for this instance of the storefront yet. Please + complete the Personalization workshop labs to add personalized capabilities. In the meantime, the default user experience will provide product information directly from the catalog.
- +
@@ -161,19 +146,17 @@ export default { this.userRecommendationsDemoGuideBadgeArticle = null; var response; - if (this.personalizeRecommendationsForVisitor && process.env.VUE_APP_AMPLITUDE_RECOMMENDATION_ID !== 'NONE' && - process.env.VUE_APP_AMPLITUDE_RECOMMENDATION_ID ) { - response = await RecommendationsRepository.getAmplitudeRecommendationsForUser(this.personalizeUserID); - } else if (this.personalizeRecommendationsForVisitor) { - + if (this.personalizeRecommendationsForVisitor) { this.featureUserRecs = EXPERIMENT_USER_RECS_FEATURE; response = await RecommendationsRepository.getRecommendationsForUser( this.personalizeUserID, '', MAX_RECOMMENDATIONS, - this.featureUserRecs); - } else { + this.featureUserRecs + ); + } + else { this.featureUserRecs = EXPERIMENT_USER_RECS_COLD_FEATURE; response = await RecommendationsRepository.getPopularProducts( @@ -185,6 +168,9 @@ export default { } if (response.headers) { + + // TODO: ADD AMPLITUDE is_control CHECK HERE TO PRESENT RECS OR NOT + const experimentName = response.headers['x-experiment-name']; const personalizeRecipe = response.headers['x-personalize-recipe']; @@ -206,23 +192,6 @@ export default { if (this.userRecommendations.length > 0 && 'experiment' in this.userRecommendations[0]) { AnalyticsHandler.identifyExperiment(this.user, this.userRecommendations[0].experiment); } - } else { // This is an Amplitude response - // Check if this is a control user or actual recommendations - if (response.userData.recommendations[0].is_control) { - this.userRecommendationsTitle = 'Popular products'; - this.featureUserRecs = EXPERIMENT_USER_RECS_COLD_FEATURE; - - response = await RecommendationsRepository.getPopularProducts( - this.personalizeUserID, - '', - MAX_RECOMMENDATIONS, - this.featureUserRecs - ); - this.userRecommendations = response.data; - } else { - this.userRecommendationsTitle = 'Inspired by your shopping trends (Amplitude)' - this.userRecommendations = response.userData.recommendations[0].items; - } } } From bebfd4b834be9cd51f2c273927c871e523628c62 Mon Sep 17 00:00:00 2001 From: Igor Date: Thu, 29 Sep 2022 09:00:24 -0700 Subject: [PATCH 21/45] log responses to test amp API --- src/web-ui/src/public/Main.vue | 9 +++++++++ src/web-ui/src/repositories/recommendationsRepository.js | 1 - 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/web-ui/src/public/Main.vue b/src/web-ui/src/public/Main.vue index e9d5fddf2..748ef19bf 100644 --- a/src/web-ui/src/public/Main.vue +++ b/src/web-ui/src/public/Main.vue @@ -155,6 +155,15 @@ export default { MAX_RECOMMENDATIONS, this.featureUserRecs ); + + let ampRecs = await RecommendationsRepository.getAmplitudeRecommendationsForUser(this.personalizeUserID); + + console.log('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ'); + console.log('Personalize Results'); + console.log(JSON.stringify(response)); + console.log('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ'); + console.log('Amplitude Response'); + console.log(JSON.stringify(ampRecs)); } else { this.featureUserRecs = EXPERIMENT_USER_RECS_COLD_FEATURE; diff --git a/src/web-ui/src/repositories/recommendationsRepository.js b/src/web-ui/src/repositories/recommendationsRepository.js index 3625bebb2..4a1166f1c 100644 --- a/src/web-ui/src/repositories/recommendationsRepository.js +++ b/src/web-ui/src/repositories/recommendationsRepository.js @@ -53,7 +53,6 @@ export default { return connection.get(related, { params: params }) }, - // TODO: Add Amplitude Profile API switch here getAmplitudeRecommendationsForUser(userID) { let ampID = String(userID).padStart(5, '0'); console.log(`Getting Amplitude recommendations for userID ${ampID}`); From 1c6baa6d1f9a4bfdfd081e42a947a818b1e85f8d Mon Sep 17 00:00:00 2001 From: Igor Date: Thu, 29 Sep 2022 09:23:58 -0700 Subject: [PATCH 22/45] added error checking for amp call --- src/web-ui/src/public/Main.vue | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/web-ui/src/public/Main.vue b/src/web-ui/src/public/Main.vue index 748ef19bf..dae136824 100644 --- a/src/web-ui/src/public/Main.vue +++ b/src/web-ui/src/public/Main.vue @@ -138,6 +138,20 @@ export default { this.featuredProducts = featuredProducts.slice(0, MAX_RECOMMENDATIONS).map((product) => ({ product })); } }, + + async getAmplitudeRecs(userId) { + try { + let ampRecs = await RecommendationsRepository.getAmplitudeRecommendationsForUser(this.personalizeUserID); + console.log('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ'); + console.log('Amplitude Response'); + console.log(JSON.stringify(ampRecs)); + return ampRecs.userData.recommendations[0]; + } catch (e) { + console.log(`ERROR fetching Amplitude results: ${e}`); + return null; + } + }, + async getUserRecommendations() { this.isLoadingRecommendations = true; this.userRecommendationsTitle = null; @@ -156,14 +170,15 @@ export default { this.featureUserRecs ); - let ampRecs = await RecommendationsRepository.getAmplitudeRecommendationsForUser(this.personalizeUserID); - console.log('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ'); console.log('Personalize Results'); console.log(JSON.stringify(response)); - console.log('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ'); - console.log('Amplitude Response'); - console.log(JSON.stringify(ampRecs)); + + if (process.env.VUE_APP_AMPLITUDE_SECRET_API_KEY && process.env.VUE_APP_AMPLITUDE_SECRET_API_KEY != 'NONE' && + process.env.VUE_APP_AMPLITUDE_RECOMMENDATION_ID && process.env.VUE_APP_AMPLITUDE_RECOMMENDATION_ID != 'NONE') { + let ampResults = this.getAmplitudeRecommendationsForUser(this.personalizeUserID); + } + } else { this.featureUserRecs = EXPERIMENT_USER_RECS_COLD_FEATURE; From 5f1ec3f018ff7e49b5d275c217c4056f860fef17 Mon Sep 17 00:00:00 2001 From: Igor Date: Thu, 29 Sep 2022 09:33:13 -0700 Subject: [PATCH 23/45] fix compile errors --- src/web-ui/src/public/Main.vue | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/web-ui/src/public/Main.vue b/src/web-ui/src/public/Main.vue index dae136824..a623e252f 100644 --- a/src/web-ui/src/public/Main.vue +++ b/src/web-ui/src/public/Main.vue @@ -141,7 +141,7 @@ export default { async getAmplitudeRecs(userId) { try { - let ampRecs = await RecommendationsRepository.getAmplitudeRecommendationsForUser(this.personalizeUserID); + let ampRecs = await RecommendationsRepository.getAmplitudeRecommendationsForUser(userId); console.log('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ'); console.log('Amplitude Response'); console.log(JSON.stringify(ampRecs)); @@ -176,7 +176,7 @@ export default { if (process.env.VUE_APP_AMPLITUDE_SECRET_API_KEY && process.env.VUE_APP_AMPLITUDE_SECRET_API_KEY != 'NONE' && process.env.VUE_APP_AMPLITUDE_RECOMMENDATION_ID && process.env.VUE_APP_AMPLITUDE_RECOMMENDATION_ID != 'NONE') { - let ampResults = this.getAmplitudeRecommendationsForUser(this.personalizeUserID); + this.getAmplitudeRecommendationsForUser(this.personalizeUserID); } } From 855c10eccba663b984872b064961c04b8327704a Mon Sep 17 00:00:00 2001 From: Igor Date: Thu, 29 Sep 2022 09:52:54 -0700 Subject: [PATCH 24/45] fixed function name error --- src/web-ui/src/public/Main.vue | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/web-ui/src/public/Main.vue b/src/web-ui/src/public/Main.vue index a623e252f..7b01f533c 100644 --- a/src/web-ui/src/public/Main.vue +++ b/src/web-ui/src/public/Main.vue @@ -139,7 +139,7 @@ export default { } }, - async getAmplitudeRecs(userId) { + async getAmplitudeRecommendations(userId) { try { let ampRecs = await RecommendationsRepository.getAmplitudeRecommendationsForUser(userId); console.log('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ'); @@ -176,7 +176,7 @@ export default { if (process.env.VUE_APP_AMPLITUDE_SECRET_API_KEY && process.env.VUE_APP_AMPLITUDE_SECRET_API_KEY != 'NONE' && process.env.VUE_APP_AMPLITUDE_RECOMMENDATION_ID && process.env.VUE_APP_AMPLITUDE_RECOMMENDATION_ID != 'NONE') { - this.getAmplitudeRecommendationsForUser(this.personalizeUserID); + this.getAmplitudeRecommendations(this.personalizeUserID); } } From 53d526a5763487e1120a247576b449b22c3dd7b6 Mon Sep 17 00:00:00 2001 From: Igor Date: Sat, 1 Oct 2022 16:34:42 -0700 Subject: [PATCH 25/45] added amplitude env vars --- aws/cloudformation-templates/services/service/service.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aws/cloudformation-templates/services/service/service.yaml b/aws/cloudformation-templates/services/service/service.yaml index bff7c249d..b47f2baf5 100644 --- a/aws/cloudformation-templates/services/service/service.yaml +++ b/aws/cloudformation-templates/services/service/service.yaml @@ -342,6 +342,10 @@ Resources: Secrets: - Name: OPTIMIZELY_SDK_KEY ValueFrom: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore-optimizely-sdk-key' + - Name: AMPLITUDE_SECRET_API_KEY + ValueFrom: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore/webui/amplitude_api_key' + - Name: AMPLITUDE_RECOMMENDATION_ID + ValueFrom: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore/webui/amplitude_recommendation_id' - Name: SEGMENT_WRITE_KEY ValueFrom: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore-segment-write-key' Environment: From af05f5aba084e51df13f0a6de8647218765bb3c5 Mon Sep 17 00:00:00 2001 From: Igor Date: Sat, 1 Oct 2022 23:51:00 -0700 Subject: [PATCH 26/45] added amp recs experiment --- .../experimentation/experiment_amplitude.py | 56 +++++++++++++++++++ .../experimentation/experiment_manager.py | 9 +++ src/web-ui/src/public/Main.vue | 2 +- 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py new file mode 100644 index 000000000..37fa5a829 --- /dev/null +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py @@ -0,0 +1,56 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +import os +from datetime import datetime +from . import experiment, resolvers +import requests +import logging + +amplitude_rec_id = os.environ.get('AMPLITUDE_RECOMMENDATION_ID', 'NONE') +amplitude_secret_key = os.environ.get('AMPLITUDE_SECRET_KEY', 'NONE') +amplitude_configured = amplitude_rec_id != 'NONE' and amplitude_secret_key != 'NONE' + +log = logging.getLogger(__name__) + +class AmplitudeFeatureTest(experiment.Experiment): + def get_items(self, user_id, current_item_id=None, item_list=None, num_results=10, tracker=None, filter_values=None, context=None, timestamp: datetime = None): + assert user_id, "`user_id` is required" + + # Call Amplitude + # If user is in control, get DefaultResolver (featrued products) + # else recommend the amplitude ones as this user get the treatment in the experiment + + # All the kwargs that are passed to ResolverFactory.get will be stored as a JSON feature variable. + # algorithm_type = optimizely_sdk.get_feature_variable_string(self.feature, 'algorithm_type', user_id=user_id) + # algorithm_config = optimizely_sdk.get_feature_variable_json(self.feature, 'algorithm_config', user_id=user_id) + # resolver = resolvers.ResolverFactory.get(type=algorithm_type, **algorithm_config) + + # items = resolver.get_items(user_id=user_id, + # product_id=current_item_id, + # product_list=item_list, + # num_results=num_results, + # filter_values=filter_values, + # context=context) + + response = requests.get('https://profile-api.amplitude.com', + headers={'Authorization': f'Api-Key ${amplitude_secret_key}'}) + + log.info(f'${response}') + + feature = 'home_product_recs' # Only for the home page for this workshop + + for rank, item in enumerate(items, 1): + correlation_id = self._create_correlation_id(user_id, variation_key, rank) + item['experiment'] = {'type': 'amplitude', + 'feature': self.feature, + 'name': experiment_key, + 'experiment_key': experiment_key, + 'variationIndex': variation_key, + 'revision_number': config.revision, + 'correlationId': correlation_id} + return items + + def track_conversion(self, correlation_id: str, timestamp: datetime = datetime.now()): + """ Conversion tracking is handled by the Optimizely client library """ + pass \ No newline at end of file diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py index bba19bc07..128117091 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py @@ -1,6 +1,7 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT-0 +from recommendations.src.recommendations-service.experimentation.experiment_amplitude import AmplitudeFeatureTest import boto3 import logging @@ -10,6 +11,7 @@ from experimentation.experiment_mab import MultiArmedBanditExperiment from experimentation.evidently_feature_resolver import EvidentlyFeatureResolver from experimentation.experiment_optimizely import OptimizelyFeatureTest, optimizely_sdk, optimizely_configured +from experimentation.experiment_amplitude import AmplitudeFeatureTest, amplitude_configured from experimentation.tracking import KinesisTracker log = logging.getLogger(__name__) @@ -40,8 +42,15 @@ def is_configured(self): def is_optimizely_configured(self): return optimizely_configured + def is_amplitude_configured(self): + return amplitude_configured + def get_active(self, feature, user_id): """ Returns the active experiment for the given feature """ + + if self.is_amplitude_configured(): + return AmplitudeFeatureTest() + # 1. If Optimizely is configured for this deployment, check for active Optimizely experiment. if self.is_optimizely_configured(): config = optimizely_sdk.get_optimizely_config() diff --git a/src/web-ui/src/public/Main.vue b/src/web-ui/src/public/Main.vue index 7b01f533c..40ac28471 100644 --- a/src/web-ui/src/public/Main.vue +++ b/src/web-ui/src/public/Main.vue @@ -145,7 +145,7 @@ export default { console.log('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ'); console.log('Amplitude Response'); console.log(JSON.stringify(ampRecs)); - return ampRecs.userData.recommendations[0]; + return ampRecs.data.userData.recommendations[0]; } catch (e) { console.log(`ERROR fetching Amplitude results: ${e}`); return null; From 77fe202b720ae5cbe398990bb8463030a15ddf6c Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 01:38:49 -0700 Subject: [PATCH 27/45] added logging to amp results --- .../experimentation/experiment_amplitude.py | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py index 37fa5a829..6c172b879 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py @@ -21,34 +21,31 @@ def get_items(self, user_id, current_item_id=None, item_list=None, num_results=1 # If user is in control, get DefaultResolver (featrued products) # else recommend the amplitude ones as this user get the treatment in the experiment - # All the kwargs that are passed to ResolverFactory.get will be stored as a JSON feature variable. - # algorithm_type = optimizely_sdk.get_feature_variable_string(self.feature, 'algorithm_type', user_id=user_id) - # algorithm_config = optimizely_sdk.get_feature_variable_json(self.feature, 'algorithm_config', user_id=user_id) - # resolver = resolvers.ResolverFactory.get(type=algorithm_type, **algorithm_config) - - # items = resolver.get_items(user_id=user_id, - # product_id=current_item_id, - # product_list=item_list, - # num_results=num_results, - # filter_values=filter_values, - # context=context) + algorithm_config = {} + resolver = resolvers.ResolverFactory.get(type=resolvers.ResolverFactory.TYPE_PRODUCT, **algorithm_config) + + items = resolver.get_items(user_id=user_id, + product_id=current_item_id, + product_list=item_list, + num_results=num_results, + filter_values=filter_values, + context=context) + + log.info(f'Resolver Items: ${items}') response = requests.get('https://profile-api.amplitude.com', - headers={'Authorization': f'Api-Key ${amplitude_secret_key}'}) + headers={'Authorization': f'Api-Key ${amplitude_secret_key}'}, + params={'user_id': f'${user_id}', 'rec_id': amplitude_rec_id}) - log.info(f'${response}') + log.info(f'Amplitude Items: ${response}') - feature = 'home_product_recs' # Only for the home page for this workshop + self.feature = 'home_product_recs' # Only for the home page for this workshop for rank, item in enumerate(items, 1): - correlation_id = self._create_correlation_id(user_id, variation_key, rank) + #correlation_id = self._create_correlation_id(user_id, variation_key, rank) item['experiment'] = {'type': 'amplitude', 'feature': self.feature, - 'name': experiment_key, - 'experiment_key': experiment_key, - 'variationIndex': variation_key, - 'revision_number': config.revision, - 'correlationId': correlation_id} + 'name': f'Amplitude ${amplitude_rec_id}'} return items def track_conversion(self, correlation_id: str, timestamp: datetime = datetime.now()): From 771ac6ebb197699f3a052fd8a073467ba61b15cf Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 02:16:08 -0700 Subject: [PATCH 28/45] removed import error --- generators/test_amp_rec.py | 13 +++++++++++++ .../experimentation/experiment_amplitude.py | 12 ++++++------ .../experimentation/experiment_manager.py | 1 - 3 files changed, 19 insertions(+), 7 deletions(-) create mode 100644 generators/test_amp_rec.py diff --git a/generators/test_amp_rec.py b/generators/test_amp_rec.py new file mode 100644 index 000000000..f59db7f28 --- /dev/null +++ b/generators/test_amp_rec.py @@ -0,0 +1,13 @@ +import os +import requests + +amplitude_secret_key = '1795766d2d822cda9f4875654f9a6ee6' +amplitude_rec_id = 'xv8mseo' +user_id = '05087' + +response = requests.get('https://profile-api.amplitude.com/v1/userprofile', + headers={'Authorization': f'Api-Key {amplitude_secret_key}'}, + params={'user_id': user_id, 'rec_id': amplitude_rec_id}) + +print(response.url) +print(response.content) \ No newline at end of file diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py index 6c172b879..ef411579c 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py @@ -31,13 +31,13 @@ def get_items(self, user_id, current_item_id=None, item_list=None, num_results=1 filter_values=filter_values, context=context) - log.info(f'Resolver Items: ${items}') + log.info(f'Resolver Items: {items}') - response = requests.get('https://profile-api.amplitude.com', - headers={'Authorization': f'Api-Key ${amplitude_secret_key}'}, - params={'user_id': f'${user_id}', 'rec_id': amplitude_rec_id}) + response = requests.get('https://profile-api.amplitude.com/v1/userprofile', + headers={'Authorization': f'Api-Key {amplitude_secret_key}'}, + params={'user_id': f'{user_id}', 'rec_id': amplitude_rec_id}) - log.info(f'Amplitude Items: ${response}') + log.info(f'Amplitude Items: {response}') self.feature = 'home_product_recs' # Only for the home page for this workshop @@ -45,7 +45,7 @@ def get_items(self, user_id, current_item_id=None, item_list=None, num_results=1 #correlation_id = self._create_correlation_id(user_id, variation_key, rank) item['experiment'] = {'type': 'amplitude', 'feature': self.feature, - 'name': f'Amplitude ${amplitude_rec_id}'} + 'name': f'Amplitude {amplitude_rec_id}'} return items def track_conversion(self, correlation_id: str, timestamp: datetime = datetime.now()): diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py index 128117091..959394d24 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py @@ -1,7 +1,6 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: MIT-0 -from recommendations.src.recommendations-service.experimentation.experiment_amplitude import AmplitudeFeatureTest import boto3 import logging From e72190a128683417ef822a2a00205986475acc28 Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 10:52:29 -0700 Subject: [PATCH 29/45] add env vars to docker.yml for recs svc --- src/docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/docker-compose.yml b/src/docker-compose.yml index 9d84bc3e4..17fce4fe6 100644 --- a/src/docker-compose.yml +++ b/src/docker-compose.yml @@ -70,6 +70,8 @@ services: - OFFERS_SERVICE_PORT - AWS_XRAY_DAEMON_ADDRESS=xray:2000 - EVIDENTLY_PROJECT_NAME + - AMPLITUDE_SECRET_API_KEY + - AMPLITUDE_RECOMMENDATION_ID volumes: - ~/.aws/:/root/.aws:ro build: From 0a33add47a4465020c008f18d77555fdc7ce7a27 Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 11:30:58 -0700 Subject: [PATCH 30/45] log when ampl activated --- .../experimentation/experiment_manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py index 959394d24..7b3619f95 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py @@ -48,6 +48,8 @@ def get_active(self, feature, user_id): """ Returns the active experiment for the given feature """ if self.is_amplitude_configured(): + log.debug('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ') + log.debug('Amplitude Configured') return AmplitudeFeatureTest() # 1. If Optimizely is configured for this deployment, check for active Optimizely experiment. From 7becc9e5241ff7a977601920b43bb6725e2adec8 Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 12:32:00 -0700 Subject: [PATCH 31/45] logging for amp rec id --- .../experimentation/experiment_amplitude.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py index ef411579c..7280bb17c 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py @@ -13,6 +13,9 @@ log = logging.getLogger(__name__) +log.info('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ') +log.info(f'amp id: {amplitude_rec_id}') + class AmplitudeFeatureTest(experiment.Experiment): def get_items(self, user_id, current_item_id=None, item_list=None, num_results=10, tracker=None, filter_values=None, context=None, timestamp: datetime = None): assert user_id, "`user_id` is required" From 62341f9a29a23e6defd598954da66d39462f5380 Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 12:49:55 -0700 Subject: [PATCH 32/45] log in main app --- src/recommendations/src/recommendations-service/app.py | 7 +++++++ .../experimentation/experiment_amplitude.py | 3 --- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/recommendations/src/recommendations-service/app.py b/src/recommendations/src/recommendations-service/app.py index 9739a8166..120737835 100644 --- a/src/recommendations/src/recommendations-service/app.py +++ b/src/recommendations/src/recommendations-service/app.py @@ -1030,4 +1030,11 @@ def experiment_outcome(): app.wsgi_app = LoggingMiddleware(app.wsgi_app) + amplitude_rec_id = os.environ.get('AMPLITUDE_RECOMMENDATION_ID', 'NONE') + amplitude_secret_key = os.environ.get('AMPLITUDE_SECRET_KEY', 'NONE') + + logging.info('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ') + logging.info(f'amp id: {amplitude_rec_id}') + logging.info(f'amp secret: {amplitude_secret_key}') + app.run(debug=True, host='0.0.0.0', port=80) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py index 7280bb17c..ef411579c 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py @@ -13,9 +13,6 @@ log = logging.getLogger(__name__) -log.info('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ') -log.info(f'amp id: {amplitude_rec_id}') - class AmplitudeFeatureTest(experiment.Experiment): def get_items(self, user_id, current_item_id=None, item_list=None, num_results=10, tracker=None, filter_values=None, context=None, timestamp: datetime = None): assert user_id, "`user_id` is required" From c138d92ef2046e6b4bb6d2e95d895e796c4be6bf Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 12:58:13 -0700 Subject: [PATCH 33/45] logging try #4 --- src/recommendations/src/recommendations-service/app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/recommendations/src/recommendations-service/app.py b/src/recommendations/src/recommendations-service/app.py index 120737835..a2d076251 100644 --- a/src/recommendations/src/recommendations-service/app.py +++ b/src/recommendations/src/recommendations-service/app.py @@ -1033,8 +1033,8 @@ def experiment_outcome(): amplitude_rec_id = os.environ.get('AMPLITUDE_RECOMMENDATION_ID', 'NONE') amplitude_secret_key = os.environ.get('AMPLITUDE_SECRET_KEY', 'NONE') - logging.info('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ') - logging.info(f'amp id: {amplitude_rec_id}') - logging.info(f'amp secret: {amplitude_secret_key}') + app.logger.debug('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ') + app.logger.debug(f'amp id: {amplitude_rec_id}') + app.logger.debug(f'amp secret: {amplitude_secret_key}') app.run(debug=True, host='0.0.0.0', port=80) From d8c46e05d4b27318797de96716e8369853963dac Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 13:30:36 -0700 Subject: [PATCH 34/45] logging with print --- src/recommendations/src/recommendations-service/app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/recommendations/src/recommendations-service/app.py b/src/recommendations/src/recommendations-service/app.py index a2d076251..4fe066003 100644 --- a/src/recommendations/src/recommendations-service/app.py +++ b/src/recommendations/src/recommendations-service/app.py @@ -1033,8 +1033,8 @@ def experiment_outcome(): amplitude_rec_id = os.environ.get('AMPLITUDE_RECOMMENDATION_ID', 'NONE') amplitude_secret_key = os.environ.get('AMPLITUDE_SECRET_KEY', 'NONE') - app.logger.debug('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ') - app.logger.debug(f'amp id: {amplitude_rec_id}') - app.logger.debug(f'amp secret: {amplitude_secret_key}') + print('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ') + print(f'amp id: {amplitude_rec_id}') + print(f'amp secret: {amplitude_secret_key}') app.run(debug=True, host='0.0.0.0', port=80) From e0b439644003ac2b0911f532844af2fa768f6b9a Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 13:32:51 -0700 Subject: [PATCH 35/45] remove request log spam --- src/recommendations/src/recommendations-service/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/recommendations/src/recommendations-service/app.py b/src/recommendations/src/recommendations-service/app.py index 4fe066003..b4489e7bb 100644 --- a/src/recommendations/src/recommendations-service/app.py +++ b/src/recommendations/src/recommendations-service/app.py @@ -1028,7 +1028,7 @@ def experiment_outcome(): logging.getLogger('experimentation.experiment_manager').addHandler(handler) handler.setLevel(level) # this will get the main app logs to CloudWatch - app.wsgi_app = LoggingMiddleware(app.wsgi_app) + #app.wsgi_app = LoggingMiddleware(app.wsgi_app) amplitude_rec_id = os.environ.get('AMPLITUDE_RECOMMENDATION_ID', 'NONE') amplitude_secret_key = os.environ.get('AMPLITUDE_SECRET_KEY', 'NONE') From aa0fd33b77920a9cda2d8738d06101cf6121a35d Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 14:05:36 -0700 Subject: [PATCH 36/45] fixed container ssm param bug --- aws/cloudformation-templates/services/service/service.yaml | 2 +- src/recommendations/src/recommendations-service/app.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aws/cloudformation-templates/services/service/service.yaml b/aws/cloudformation-templates/services/service/service.yaml index b47f2baf5..1ce245ce6 100644 --- a/aws/cloudformation-templates/services/service/service.yaml +++ b/aws/cloudformation-templates/services/service/service.yaml @@ -343,7 +343,7 @@ Resources: - Name: OPTIMIZELY_SDK_KEY ValueFrom: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore-optimizely-sdk-key' - Name: AMPLITUDE_SECRET_API_KEY - ValueFrom: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore/webui/amplitude_api_key' + ValueFrom: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore/webui/amplitude_secret_api_key' - Name: AMPLITUDE_RECOMMENDATION_ID ValueFrom: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/retaildemostore/webui/amplitude_recommendation_id' - Name: SEGMENT_WRITE_KEY diff --git a/src/recommendations/src/recommendations-service/app.py b/src/recommendations/src/recommendations-service/app.py index b4489e7bb..add2b71da 100644 --- a/src/recommendations/src/recommendations-service/app.py +++ b/src/recommendations/src/recommendations-service/app.py @@ -1031,7 +1031,7 @@ def experiment_outcome(): #app.wsgi_app = LoggingMiddleware(app.wsgi_app) amplitude_rec_id = os.environ.get('AMPLITUDE_RECOMMENDATION_ID', 'NONE') - amplitude_secret_key = os.environ.get('AMPLITUDE_SECRET_KEY', 'NONE') + amplitude_secret_key = os.environ.get('AMPLITUDE_SECRET_API_KEY', 'NONE') print('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ') print(f'amp id: {amplitude_rec_id}') From 5ef5f68b390069570ca011b00ed8adf29198c4ab Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 14:28:51 -0700 Subject: [PATCH 37/45] saved correct settings string --- .../experimentation/experiment_amplitude.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py index ef411579c..275151740 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py @@ -8,7 +8,7 @@ import logging amplitude_rec_id = os.environ.get('AMPLITUDE_RECOMMENDATION_ID', 'NONE') -amplitude_secret_key = os.environ.get('AMPLITUDE_SECRET_KEY', 'NONE') +amplitude_secret_key = os.environ.get('AMPLITUDE_SECRET_API_KEY', 'NONE') amplitude_configured = amplitude_rec_id != 'NONE' and amplitude_secret_key != 'NONE' log = logging.getLogger(__name__) From 7314cc609a673c1f1080fe329560c93d7739166f Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 14:46:19 -0700 Subject: [PATCH 38/45] update logging --- .../experimentation/experiment_amplitude.py | 6 ++---- .../experimentation/experiment_manager.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py index 275151740..7c68db444 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py @@ -11,8 +11,6 @@ amplitude_secret_key = os.environ.get('AMPLITUDE_SECRET_API_KEY', 'NONE') amplitude_configured = amplitude_rec_id != 'NONE' and amplitude_secret_key != 'NONE' -log = logging.getLogger(__name__) - class AmplitudeFeatureTest(experiment.Experiment): def get_items(self, user_id, current_item_id=None, item_list=None, num_results=10, tracker=None, filter_values=None, context=None, timestamp: datetime = None): assert user_id, "`user_id` is required" @@ -31,13 +29,13 @@ def get_items(self, user_id, current_item_id=None, item_list=None, num_results=1 filter_values=filter_values, context=context) - log.info(f'Resolver Items: {items}') + print(f'******************** Amp Resolver Items: {items}') response = requests.get('https://profile-api.amplitude.com/v1/userprofile', headers={'Authorization': f'Api-Key {amplitude_secret_key}'}, params={'user_id': f'{user_id}', 'rec_id': amplitude_rec_id}) - log.info(f'Amplitude Items: {response}') + print(f'********************* Amplitude Items: {response}') self.feature = 'home_product_recs' # Only for the home page for this workshop diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py index 7b3619f95..924dbcebb 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py @@ -47,9 +47,9 @@ def is_amplitude_configured(self): def get_active(self, feature, user_id): """ Returns the active experiment for the given feature """ + print(f'************ AMPLITUDE CONFIGURED: {self.is_amplitude_configured()} *******************') + if self.is_amplitude_configured(): - log.debug('ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ') - log.debug('Amplitude Configured') return AmplitudeFeatureTest() # 1. If Optimizely is configured for this deployment, check for active Optimizely experiment. From bb2305e59967c04af7350d4d35968a208083e646 Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 15:01:14 -0700 Subject: [PATCH 39/45] check for feature type --- .../experimentation/experiment_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py index 924dbcebb..181ee2a52 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py @@ -47,9 +47,9 @@ def is_amplitude_configured(self): def get_active(self, feature, user_id): """ Returns the active experiment for the given feature """ - print(f'************ AMPLITUDE CONFIGURED: {self.is_amplitude_configured()} *******************') + log.debug(f'************ AMPLITUDE CONFIGURED: {self.is_amplitude_configured()} *******************') - if self.is_amplitude_configured(): + if self.is_amplitude_configured() and feature == 'home_product_recs': return AmplitudeFeatureTest() # 1. If Optimizely is configured for this deployment, check for active Optimizely experiment. From 5120abd8ead416a3c768b477dd140bd7cb7d3635 Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 15:46:29 -0700 Subject: [PATCH 40/45] set test data --- .../experimentation/experiment_manager.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py index 181ee2a52..8248f9f80 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py @@ -10,7 +10,7 @@ from experimentation.experiment_mab import MultiArmedBanditExperiment from experimentation.evidently_feature_resolver import EvidentlyFeatureResolver from experimentation.experiment_optimizely import OptimizelyFeatureTest, optimizely_sdk, optimizely_configured -from experimentation.experiment_amplitude import AmplitudeFeatureTest, amplitude_configured +from experimentation.experiment_amplitude import AmplitudeFeatureTest, amplitude_configured, amplitude_rec_id from experimentation.tracking import KinesisTracker log = logging.getLogger(__name__) @@ -50,7 +50,13 @@ def get_active(self, feature, user_id): log.debug(f'************ AMPLITUDE CONFIGURED: {self.is_amplitude_configured()} *******************') if self.is_amplitude_configured() and feature == 'home_product_recs': - return AmplitudeFeatureTest() + data = {'id': amplitude_rec_id, + 'feature': feature, + 'name': 'amplitude-home-product-recs', + 'status': 'ACTIVE', + 'type': 'amplitude', + 'variations': []} + return AmplitudeFeatureTest(**data) # 1. If Optimizely is configured for this deployment, check for active Optimizely experiment. if self.is_optimizely_configured(): From b80b6bb65722bbf79be02a7e2c208025f5851419 Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 2 Oct 2022 21:56:17 -0700 Subject: [PATCH 41/45] added item / experiment to results --- .../experimentation/experiment_amplitude.py | 26 ++++++++++++++++--- .../experimentation/experiment_manager.py | 3 +-- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py index 7c68db444..1da915040 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py @@ -35,15 +35,33 @@ def get_items(self, user_id, current_item_id=None, item_list=None, num_results=1 headers={'Authorization': f'Api-Key {amplitude_secret_key}'}, params={'user_id': f'{user_id}', 'rec_id': amplitude_rec_id}) - print(f'********************* Amplitude Items: {response}') + print(f'********************* Amplitude Items: {response.content}') self.feature = 'home_product_recs' # Only for the home page for this workshop for rank, item in enumerate(items, 1): + correlation_id = self._create_correlation_id(user_id, 0, rank) + + item_experiment = { + 'id': self.id, + 'feature': self.feature, + 'name': self.name, + 'type': self.type, + 'variationIndex': 0, + 'resultRank': rank, + 'correlationId': correlation_id + } + + item.update({ + 'experiment': item_experiment + }) + + rank += 1 + #correlation_id = self._create_correlation_id(user_id, variation_key, rank) - item['experiment'] = {'type': 'amplitude', - 'feature': self.feature, - 'name': f'Amplitude {amplitude_rec_id}'} + # item['experiment'] = {'type': 'amplitude', + # 'feature': self.feature, + # 'name': f'Amplitude {amplitude_rec_id}'} return items def track_conversion(self, correlation_id: str, timestamp: datetime = datetime.now()): diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py index 8248f9f80..6850ec8cc 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_manager.py @@ -47,9 +47,8 @@ def is_amplitude_configured(self): def get_active(self, feature, user_id): """ Returns the active experiment for the given feature """ - log.debug(f'************ AMPLITUDE CONFIGURED: {self.is_amplitude_configured()} *******************') - if self.is_amplitude_configured() and feature == 'home_product_recs': + log.debug(f'************ AMPLITUDE CONFIGURED: {self.is_amplitude_configured()} *******************') data = {'id': amplitude_rec_id, 'feature': feature, 'name': 'amplitude-home-product-recs', From d14ac549000d72088ab576c820aa4d76d45ff98a Mon Sep 17 00:00:00 2001 From: Igor Date: Mon, 3 Oct 2022 00:36:28 -0700 Subject: [PATCH 42/45] logic for amp rec items --- generators/test_amp_rec.py | 21 +++++++--- .../experimentation/experiment_amplitude.py | 42 ++++++++++++------- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/generators/test_amp_rec.py b/generators/test_amp_rec.py index f59db7f28..f3d39a720 100644 --- a/generators/test_amp_rec.py +++ b/generators/test_amp_rec.py @@ -3,11 +3,20 @@ amplitude_secret_key = '1795766d2d822cda9f4875654f9a6ee6' amplitude_rec_id = 'xv8mseo' -user_id = '05087' +#user_id = 5087 -response = requests.get('https://profile-api.amplitude.com/v1/userprofile', - headers={'Authorization': f'Api-Key {amplitude_secret_key}'}, - params={'user_id': user_id, 'rec_id': amplitude_rec_id}) +for user_id in range(1, 25): + uid = f'{user_id:0>5}' + response = requests.get('https://profile-api.amplitude.com/v1/userprofile', + headers={'Authorization': f'Api-Key {amplitude_secret_key}'}, + params={'user_id': uid, 'rec_id': amplitude_rec_id}) + res = response.json() + items = [] + is_user_in_control_group = True + if res: + for item in res['userData']['recommendations'][0]['items']: + items.append({'itemId': item}) -print(response.url) -print(response.content) \ No newline at end of file + is_user_in_control_group = res['userData']['recommendations'][0]['is_control'] + + print(f'{uid} {is_user_in_control_group}') \ No newline at end of file diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py index 1da915040..8f2593275 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py @@ -12,6 +12,23 @@ amplitude_configured = amplitude_rec_id != 'NONE' and amplitude_secret_key != 'NONE' class AmplitudeFeatureTest(experiment.Experiment): + + def get_amplitude_items(user_id): + uid = f'{user_id:0>5}' + response = requests.get('https://profile-api.amplitude.com/v1/userprofile', + headers={'Authorization': f'Api-Key {amplitude_secret_key}'}, + params={'user_id': uid, 'rec_id': amplitude_rec_id}) + res = response.json() + items = [] + is_user_in_control_group = True + if res: + for item in res['userData']['recommendations'][0]['items']: + items.append({'itemId': item}) + + is_user_in_control_group = res['userData']['recommendations'][0]['is_control'] + + return is_user_in_control_group, items + def get_items(self, user_id, current_item_id=None, item_list=None, num_results=10, tracker=None, filter_values=None, context=None, timestamp: datetime = None): assert user_id, "`user_id` is required" @@ -19,23 +36,18 @@ def get_items(self, user_id, current_item_id=None, item_list=None, num_results=1 # If user is in control, get DefaultResolver (featrued products) # else recommend the amplitude ones as this user get the treatment in the experiment - algorithm_config = {} - resolver = resolvers.ResolverFactory.get(type=resolvers.ResolverFactory.TYPE_PRODUCT, **algorithm_config) - - items = resolver.get_items(user_id=user_id, - product_id=current_item_id, - product_list=item_list, - num_results=num_results, - filter_values=filter_values, - context=context) - - print(f'******************** Amp Resolver Items: {items}') + is_control, items = self.get_amplitude_items(user_id) - response = requests.get('https://profile-api.amplitude.com/v1/userprofile', - headers={'Authorization': f'Api-Key {amplitude_secret_key}'}, - params={'user_id': f'{user_id}', 'rec_id': amplitude_rec_id}) + if is_control: # This user is in the control group, show them most popular products + algorithm_config = {} + resolver = resolvers.ResolverFactory.get(type=resolvers.ResolverFactory.TYPE_PRODUCT, **algorithm_config) - print(f'********************* Amplitude Items: {response.content}') + items = resolver.get_items(user_id=user_id, + product_id=current_item_id, + product_list=item_list, + num_results=num_results, + filter_values=filter_values, + context=context) self.feature = 'home_product_recs' # Only for the home page for this workshop From b432386e37e6a213c89aac4e984d27c087fed77b Mon Sep 17 00:00:00 2001 From: Igor Date: Mon, 3 Oct 2022 00:45:12 -0700 Subject: [PATCH 43/45] fixed class error --- .../experimentation/experiment_amplitude.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py index 8f2593275..b8ee2fc94 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py @@ -13,7 +13,7 @@ class AmplitudeFeatureTest(experiment.Experiment): - def get_amplitude_items(user_id): + def get_amplitude_items(self, user_id): uid = f'{user_id:0>5}' response = requests.get('https://profile-api.amplitude.com/v1/userprofile', headers={'Authorization': f'Api-Key {amplitude_secret_key}'}, From 137a5d5447dfa1731978796d1f5ba2985213f717 Mon Sep 17 00:00:00 2001 From: Igor Date: Mon, 3 Oct 2022 16:30:29 -0700 Subject: [PATCH 44/45] removed comments --- .../experimentation/experiment_amplitude.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py index b8ee2fc94..e42409ad6 100644 --- a/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py +++ b/src/recommendations/src/recommendations-service/experimentation/experiment_amplitude.py @@ -32,10 +32,6 @@ def get_amplitude_items(self, user_id): def get_items(self, user_id, current_item_id=None, item_list=None, num_results=10, tracker=None, filter_values=None, context=None, timestamp: datetime = None): assert user_id, "`user_id` is required" - # Call Amplitude - # If user is in control, get DefaultResolver (featrued products) - # else recommend the amplitude ones as this user get the treatment in the experiment - is_control, items = self.get_amplitude_items(user_id) if is_control: # This user is in the control group, show them most popular products From ad8c7fdb5348f25889521bd3b5102de61b9a1450 Mon Sep 17 00:00:00 2001 From: Igor Date: Fri, 7 Oct 2022 09:52:33 -0700 Subject: [PATCH 45/45] added event send option to send script --- .../generate_amplitude_conversion_events.py | 66 +++++++++++++++++++ generators/send_to_amplitude.py | 22 ++++++- generators/test_amp_rec.py | 22 ------- 3 files changed, 85 insertions(+), 25 deletions(-) create mode 100644 generators/generate_amplitude_conversion_events.py delete mode 100644 generators/test_amp_rec.py diff --git a/generators/generate_amplitude_conversion_events.py b/generators/generate_amplitude_conversion_events.py new file mode 100644 index 000000000..68494e2dd --- /dev/null +++ b/generators/generate_amplitude_conversion_events.py @@ -0,0 +1,66 @@ +import os +import requests +import uuid +import time +import json + +amplitude_secret_key = '1795766d2d822cda9f4875654f9a6ee6' +amplitude_rec_id = 'xv8mseo' + +conversion = .2 + +control_group = [] +experiment_group = [] + +output_file_name = 'src/aws-lambda/personalize-pre-create-resources/data/amplitude/conversion_events.json' + +conversion_event = 'Purchase' + +timestamp_start = time.time() # Start of script timestamp +first_timestamp = timestamp_start - (60*60*24*2) + +def write_events(user_group): + # Generate output file for experiment group + time_increment = (60*60*24*2) / len(experiment_group) + + with open(output_file_name, 'w') as f: + timestamp = first_timestamp + for user in user_group: + amplitude_event = { + "event_type": conversion_event, + "time": int(timestamp * 1000), # Amplitude wants time in ms since the epoch, we have sec + "user_id": user['user_id'], # Amplitude wants a UID as a string with no less than 5 chars + "insert_id": str(uuid.uuid4()), # This is to prevent duplicates when re-running event gen scripts + "event_properties": { + "product_id": user['items'][1], + } + } + + f.write(f'{json.dumps(amplitude_event)}\n') + timestamp += time_increment + +for user_id in range(1000, 2500): + uid = f'{user_id:0>5}' + response = requests.get('https://profile-api.amplitude.com/v1/userprofile', + headers={'Authorization': f'Api-Key {amplitude_secret_key}'}, + params={'user_id': uid, 'rec_id': amplitude_rec_id}) + res = response.json() + items = [] + is_user_in_control_group = True + if res: + for item in res['userData']['recommendations'][0]['items']: + items.append(item) + + is_user_in_control_group = res['userData']['recommendations'][0]['is_control'] + + if is_user_in_control_group: + control_group.append({'user_id': uid, 'items': items}) + else: + experiment_group.append({'user_id': uid, 'items': items}) + + #print(f'{uid} {is_user_in_control_group}') + +write_events(experiment_group) +# split the control list by half +half_length = int(len(control_group) / 2) +write_events(control_group[half_length:]) diff --git a/generators/send_to_amplitude.py b/generators/send_to_amplitude.py index e817739ce..13c2c0b77 100644 --- a/generators/send_to_amplitude.py +++ b/generators/send_to_amplitude.py @@ -3,6 +3,7 @@ import json import requests +import sys # Amplitude event support # This follows the Amplitude V2 HTTP Bulk API spec, here: @@ -12,6 +13,7 @@ # into an Amplitude API compatible represenation. amplitude_interactions_file = 'src/aws-lambda/personalize-pre-create-resources/data/amplitude/interactions.json' +conversion_events_file = 'src/aws-lambda/personalize-pre-create-resources/data/amplitude/conversion_events.json' amplitude_key = 'f6cdf64a6db24778bb9ce82188c19a95' class AmplitudeSender: @@ -39,8 +41,8 @@ def send_batch(self, events, debug=False): print(f'Sent {len(batch_events["events"])} events and got {response}') return response -if __name__ == '__main__': - with open(amplitude_interactions_file, 'r') as events_file: +def send_file(filename): + with open(filename, 'r') as events_file: sender = AmplitudeSender( { 'api_key': amplitude_key }) total_event_count = 0 @@ -60,4 +62,18 @@ def send_batch(self, events, debug=False): if len(events) > 0: sender.send_batch(events) - print(f'sending {len(events)} events') \ No newline at end of file + print(f'sending {len(events)} events') + +def get_args(name='default', first='noop'): + return first + +if __name__ == '__main__': + op = get_args(*sys.argv) + if op == 'noop': + print('SENDING EVENTS FILE') + send_file(amplitude_interactions_file) + elif op == 'conversion': + print('SENDING CONVERSIONS FILE') + send_file(conversion_events_file) + else: + print(f'INVALID OPTION {op} - USAGE: send_to_amplitude.py [conversion - optional]') \ No newline at end of file diff --git a/generators/test_amp_rec.py b/generators/test_amp_rec.py deleted file mode 100644 index f3d39a720..000000000 --- a/generators/test_amp_rec.py +++ /dev/null @@ -1,22 +0,0 @@ -import os -import requests - -amplitude_secret_key = '1795766d2d822cda9f4875654f9a6ee6' -amplitude_rec_id = 'xv8mseo' -#user_id = 5087 - -for user_id in range(1, 25): - uid = f'{user_id:0>5}' - response = requests.get('https://profile-api.amplitude.com/v1/userprofile', - headers={'Authorization': f'Api-Key {amplitude_secret_key}'}, - params={'user_id': uid, 'rec_id': amplitude_rec_id}) - res = response.json() - items = [] - is_user_in_control_group = True - if res: - for item in res['userData']['recommendations'][0]['items']: - items.append({'itemId': item}) - - is_user_in_control_group = res['userData']['recommendations'][0]['is_control'] - - print(f'{uid} {is_user_in_control_group}') \ No newline at end of file