generated from Quang7hong81/cafechungkhoan-vcsc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakov Chain trong MKT đa nền tảng.py
76 lines (62 loc) · 3.3 KB
/
Makov Chain trong MKT đa nền tảng.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
import numpy as np
from collections import defaultdict
#------------- IMPORT DATA -----------------------------
df = pd.read_csv(r'C:\HỌC TẬP\PYTHON LEARNING\STATISTIC\DATA RESEARCH\attribution data.csv')
df = df.sort_values(['cookie', 'time'],
ascending=[False, True])
df['visit_order'] = df.groupby('cookie').cumcount() + 1
# gộp data để xác định thành phần chuyển đổi cuối cùng
df_paths = df.groupby('cookie')['channel'].aggregate(
lambda x: x.unique().tolist()).reset_index()
df_last_interaction = df.drop_duplicates('cookie', keep='last')[['cookie', 'conversion']]
df_paths = pd.merge(df_paths, df_last_interaction, how='left', on='cookie')
df_paths['path'] = np.where(
df_paths['conversion'] == 0,
['Start'] + df_paths['channel'] + ['Null'],
['Start'] + df_paths['channel'] + ['Conversion'])
df_paths = df_paths[['cookie', 'path']]
# import makov chain
list_of_paths = df_paths['path']
total_conversions = sum(path.count('Conversion') for path in df_paths['path'].tolist())
base_conversion_rate = total_conversions / len(list_of_paths)
def transition_states(list_of_paths):
list_of_unique_channels = set(x for element in list_of_paths for x in element)
transition_states = {x + '>' + y: 0 for x in list_of_unique_channels for y in list_of_unique_channels}
for possible_state in list_of_unique_channels:
if possible_state not in ['Conversion', 'Null']:
for user_path in list_of_paths:
if possible_state in user_path:
indices = [i for i, s in enumerate(user_path) if possible_state in s]
for col in indices:
transition_states[user_path[col] + '>' + user_path[col + 1]] += 1
return transition_states
trans_states = transition_states(list_of_paths)
def transition_prob(trans_dict):
list_of_unique_channels = set(x for element in list_of_paths for x in element)
trans_prob = defaultdict(dict)
for state in list_of_unique_channels:
if state not in ['Conversion', 'Null']:
counter = 0
index = [i for i, s in enumerate(trans_dict) if state + '>' in s]
for col in index:
if trans_dict[list(trans_dict)[col]] > 0:
counter += trans_dict[list(trans_dict)[col]]
for col in index:
if trans_dict[list(trans_dict)[col]] > 0:
state_prob = float((trans_dict[list(trans_dict)[col]])) / float(counter)
trans_prob[list(trans_dict)[col]] = state_prob
return trans_prob
trans_prob = transition_prob(trans_states)
def transition_matrix(list_of_paths, transition_probabilities):
trans_matrix = pd.DataFrame()
list_of_unique_channels = set(x for element in list_of_paths for x in element)
for channel in list_of_unique_channels:
trans_matrix[channel] = 0.00
trans_matrix.loc[channel] = 0.00
trans_matrix.loc[channel][channel] = 1.0 if channel in ['Conversion', 'Null'] else 0.0
for key, value in transition_probabilities.items():
origin, destination = key.split('>')
trans_matrix.at[origin, destination] = value
return trans_matrix
trans_matrix = transition_matrix(list_of_paths, trans_prob)