diff --git a/aggregates/a-b project/code.py b/aggregates/a-b project/code.py index 872fb68..341eb04 100644 --- a/aggregates/a-b project/code.py +++ b/aggregates/a-b project/code.py @@ -1,17 +1,30 @@ import pandas as pd ad_clicks = pd.read_csv('ad_clicks.csv') -print(ad_clicks.head(10)) click_source = ad_clicks.groupby('utm_source').user_id.count().reset_index() print(click_source) ad_clicks['is_click'] = ~ad_clicks.ad_click_timestamp.isnull() +print(ad_clicks.head(10)) clicks_by_source = ad_clicks.groupby(['utm_source', 'is_click']).user_id.count().reset_index() print(clicks_by_source) clicks_pivot = clicks_by_source.pivot(columns='is_click', index='utm_source', values='user_id').reset_index() + +clicks_pivot['percent_clicked'] = clicks_pivot[True] / (clicks_pivot[True] + clicks_pivot[False]) * 100 print(clicks_pivot) -clicks_pivot['percent_clicked'] = +print(ad_clicks.groupby('experimental_group').user_id.count().reset_index()) + +print(ad_clicks.groupby(['experimental_group', 'is_click']).user_id.count().reset_index().pivot(index='experimental_group', columns='is_click', values='user_id').reset_index()) + +a_clicks = ad_clicks[ad_clicks.experimental_group == 'A'] +b_clicks = ad_clicks[ad_clicks.experimental_group == 'B'] + +a_clicks_pivot = a_clicks.groupby(['day', 'is_click']).user_id.count().reset_index().pivot(index='day', columns='is_click', values='user_id').reset_index() +a_clicks_pivot['percent_clicked'] = a_clicks_pivot[True] / (a_clicks_pivot[True] + a_clicks_pivot[False]) * 100 + +b_clicks_pivot = b_clicks.groupby(['day', 'is_click']).user_id.count().reset_index().pivot(index = 'day', columns = 'is_click', values = 'user_id').reset_index() +b_clicks_pivot['percent_clicked'] = b_clicks_pivot[True] / (b_clicks_pivot[True] + b_clicks_pivot[False]) * 100