Start with some imports.
Scipy for some statistics, numpy for random number generation and pandas for the data frames
import scipy.stats as stats
import numpy as np
import pandas as pd
The ClicksGame class implements all the state for the game
class ClicksGame:
# alpha and beta determine the distribution from which
# new click through rates are drawn.
# alpha=5, beta=100 will give a very narrow distribution
# with little variance in click through rates.
# alpha=2, beta=20 is a little fatter
alpha = 2
beta = 20
# weeklyimpressions is the number of trials per week in total
# The trials are distributed between the ads according to the
# rotation method
weeklyimpressions = 1000
# The starting state - no ads.
state = {}
def __init__(self, rotation='even', epsilon=0.1):
if rotation == 'even':
self.rotation = self.evenAllocation
elif rotation == 'optimise':
self.rotation = self.epsilonGreedy
self.epsilon = epsilon
self.state = {'active_ads': {},
'paused_ads': {},
'n_ads': 0,
'turns': 0,
}
def take_turn(self):
self.rotation()
def n_ads(self):
return(self.state['n_ads'])
def n_active_ads(self):
return(len(self.state['active_ads']))
def addAdvert(self):
# Adds an advert.
# First randomly generate the click through rate
ctr = np.random.beta(self.alpha,self.beta,1)[0]
ad = {'impressions':0,
'clicks':0,
'trueCtr': ctr
}
n_ads = self.state['n_ads']
self.state['active_ads'][n_ads+1] = ad
self.state['n_ads'] += 1
def pauseAdvert(self,i):
# Pause advert with id i.
if i in self.state['active_ads']:
ad = self.state['active_ads'].pop(i)
self.state['paused_ads'][i] = ad
def total_clicks(self):
clicks = 0
for i in self.state['active_ads']:
clicks += self.state['active_ads'][i]['clicks']
for i in self.state['paused_ads']:
clicks += self.state['paused_ads'][i]['clicks']
return(clicks)
def evenAllocation(self):
# Randomly allocate impressions between all active adverts
self.state['turns'] += 1
if self.state['active_ads']:
n_active = len(self.state['active_ads'])
impressions = np.random.multinomial(self.weeklyimpressions,
[1/n_active]*n_active,
size=1
)
impressions = np.ndarray.tolist(impressions)[0]
for adix in self.state['active_ads']:
ad = self.state['active_ads'][adix]
imps = impressions.pop()
clicks = np.random.binomial(imps, ad['trueCtr'], 1)[0]
self.state['active_ads'][adix]['impressions'] += imps
self.state['active_ads'][adix]['clicks'] += clicks
def epsilonGreedy(self):
# Epsilon greedy bandit allocation
# Allocated self.epsilon percent of impressions randomly
# The remaining 1-self.epsilon percent are given to the ad
# with the best observed ctr.
#
# N.B. This allocation method takes a lot longer to run than
# even allocation. This will always be the case I think but this
# isn't to say that the below code can't be made much quicker.
self.state['turns'] += 1
if self.state['active_ads']:
for i in range(self.weeklyimpressions):
if np.random.uniform(0,1,1)[0] < self.epsilon:
#explore
adix = np.random.choice(list(self.state['active_ads'].keys()),1)[0]
self.state['active_ads'][adix]['impressions'] += 1
click = np.random.uniform(0,1,1)[0] < self.state['active_ads'][adix]['trueCtr']
self.state['active_ads'][adix]['clicks'] += click
else:
#exploit
s = self.state
ctrs = [s['active_ads'][x]['clicks']/s['active_ads'][x]['impressions']
if s['active_ads'][x]['impressions'] is not 0 else 0
for x in s['active_ads']]
maxctr = max(ctrs)
for adix in s['active_ads']:
if s['active_ads'][adix]['impressions'] == 0 or s['active_ads'][adix]['clicks']/s['active_ads'][adix]['impressions'] < maxctr:
pass
else:
self.state['active_ads'][adix]['impressions'] += 1
click = np.random.uniform(0,1,1)[0] < self.state['active_ads'][adix]['trueCtr']
self.state['active_ads'][adix]['clicks'] += click
Quick demonstration game where we add two adverts and then run a week:
c = ClicksGame(rotation='optimise')
c.addAdvert()
c.addAdvert()
c.take_turn()
c.state
Now start to define some different ad test strategies.
The simplest is to add two adverts and then do nothing.
def doNothing(c):
while c.n_active_ads() < 2:
c.addAdvert()
c.take_turn()
c = ClicksGame()
doNothing(c)
doNothing(c)
c.state
A strategy which is impossible in real life but which is possible in the game is to cheat by looking at the true click through rate and picking the advert that is best.
It is useful to have this strategy as a comparison; any other strategy that does nearly as well as cheating must be pretty good.
def cheat(c):
while c.n_active_ads() < 2:
c.addAdvert()
c.take_turn()
s = c.state
ctrs = [s['active_ads'][x]['trueCtr'] for x in s['active_ads']]
maxctr = max(ctrs)
to_pause = []
for i in s['active_ads']:
if s['active_ads'][i]['trueCtr'] < maxctr:
to_pause.append(i)
for i in to_pause:
c.pauseAdvert(i)
c = ClicksGame()
while c.state['turns'] < 5:
cheat(c)
c.state
Another strawman strategy is to pick adverts to pause at random.
def random(c):
while c.n_active_ads() < 2:
c.addAdvert()
c.take_turn()
s = c.state
ads = list(s['active_ads'].keys())
ad_to_pause = np.random.choice(ads,1)[0]
c.pauseAdvert(ad_to_pause)
s = ClicksGame()
while s.state['turns'] < 5:
random(s)
s.state
A more common strategy is to do nothing until a chi-squared test has a p-value below a certain threshold.
The pick the ad with the best observed ctr.
def chi2(pvalue,c):
while c.n_active_ads() < 2:
c.addAdvert()
c.take_turn()
clicks = c.total_clicks()
if clicks > 0:
obs = []
for i in c.state['active_ads']:
ad = c.state['active_ads'][i]
obs.append([ad['clicks'], ad['impressions']-ad['clicks']])
p = stats.chi2_contingency(obs)[1]
if p < pvalue:
s = c.state
ctrs = [s['active_ads'][x]['clicks']/s['active_ads'][x]['impressions'] for x in s['active_ads']]
maxctr = max(ctrs)
to_pause = []
for i in s['active_ads']:
ad = s['active_ads'][i]
obs_ctr = ad['clicks']/ad['impressions']
if obs_ctr < maxctr:
to_pause.append(i)
for i in to_pause:
c.pauseAdvert(i)
s = ClicksGame()
while s.state['turns'] < 10:
chi2(0.1,s)
s.state
The G Test is very similar to chi-squared. Some people consider it more appropriate for this kind of problem
def g_test(pvalue,c):
while c.n_active_ads() < 2:
c.addAdvert()
c.take_turn()
clicks = c.total_clicks()
if clicks > 0:
obs = []
for i in c.state['active_ads']:
ad = c.state['active_ads'][i]
obs.append([ad['clicks'], ad['impressions']-ad['clicks']])
p = stats.chi2_contingency(obs,lambda_="log-likelihood")[1]
if p < pvalue:
s = c.state
ctrs = [s['active_ads'][x]['clicks']/s['active_ads'][x]['impressions'] for x in s['active_ads']]
maxctr = max(ctrs)
to_pause = []
for i in s['active_ads']:
ad = s['active_ads'][i]
obs_ctr = ad['clicks']/ad['impressions']
if obs_ctr < maxctr:
to_pause.append(i)
for i in to_pause:
c.pauseAdvert(i)
An easy thing to do is to just pick the best advert and pause all the others.
def pick_best(n_adverts,c):
while c.n_active_ads() < n_adverts:
c.addAdvert()
c.take_turn()
s = c.state
ctrs = [s['active_ads'][x]['clicks']/s['active_ads'][x]['impressions'] if s['active_ads'][x]['impressions'] is not 0 else 0 for x in s['active_ads']]
maxctr = max(ctrs)
to_pause = []
for i in s['active_ads']:
ad = s['active_ads'][i]
obs_ctr = ad['clicks']/ad['impressions'] if ad['impressions'] is not 0 else 0
if obs_ctr < maxctr:
to_pause.append(i)
for i in to_pause:
c.pauseAdvert(i)
The tqdm package provides nice progress bars.
from tqdm import tnrange,tqdm_notebook
playGame
runs a strategy for a specified number of turns using even ad rotation
def playGame(strategy,turns):
s = ClicksGame()
while s.state['turns'] < turns:
strategy(s)
return(s.total_clicks())
games = [
('nothing',doNothing),
('cheat', cheat),
('random', random),
('chi2_0.01', lambda s: chi2(0.01,s)),
('chi2_0.05', lambda s: chi2(0.05,s)),
('chi2_0.10', lambda s: chi2(0.1,s)),
('chi2_0.20', lambda s: chi2(0.2,s)),
('gtest_0.01', lambda s: g_test(0.01,s)),
('gtest_0.05', lambda s: g_test(0.05,s)),
('gtest_0.10', lambda s: g_test(0.1,s)),
('gtest_0.20', lambda s: g_test(0.2,s)),
('pick_best', lambda s: pick_best(2,s))
]
Play all games
import os
if os.path.isfile("rotate1.csv"):
r = pd.read_csv("rotate1.csv",index_col=0)
r["row"] = r.index % 1000
results = r.pivot(index="row",columns="variable",values="value")
else:
results = {}
for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playGame(game,52)
res.append(c)
results[label] = res
results = pd.DataFrame(results)
x = pd.DataFrame(pd.DataFrame.mean(results))
x.columns = ['Clicks']
x.index.name = "Strategy"
x.reset_index(inplace=True)
x
Cheat
performs the best (obviously). Out of all the other strategies, the general trend is that higher p-values perform better and best of all is to just pick the ad with the highest observed ctr.
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.melt(results)
if not os.path.isfile("rotate1.csv"):
df.to_csv("rotate1.csv")
plt.figure(figsize=(24,16))
p = sns.boxplot(x="Strategy", y="value", data=df)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()
Does using the epsilon-greedy algorithm to optimise ad rotation make a difference?
(This code takes a while to run - nearly 3 hours on my machine).
def playEGGame(strategy,turns):
s = ClicksGame(rotation='optimise')
while s.state['turns'] < turns:
strategy(s)
return(s.total_clicks())
if os.path.isfile("bandit1.csv"):
r = pd.read_csv("bandit1.csv",index_col=0)
r["row"] = r.index % 1000
bandit_results = r.pivot(index="row",columns="variable",values="value")
else:
bandit_results = {}
for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playEGGame(game,52)
res.append(c)
bandit_results[label] = res
bandit_results = pd.DataFrame(bandit_results)
bandit_df = pd.melt(bandit_results)
if not os.path.isfile("bandit1.csv"):
bandit_df.to_csv("bandit1.csv")
plt.figure(figsize=(15,15))
p = sns.boxplot(x="Strategy", y="value", data=df)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()
b = pd.DataFrame(pd.DataFrame.mean(bandit_results))
b.columns = ['Bandit']
b.index.name = "Strategy"
b.reset_index(inplace=True)
e = pd.DataFrame(pd.DataFrame.mean(results))
e.columns = ['Even']
e.index.name = "Strategy"
e.reset_index(inplace=True)
merged = pd.merge(b,e,on="Strategy")
merged['Difference'] = merged['Bandit'] - merged['Even']
merged['Percentage'] = merged['Bandit']/merged['Even'] - 1
merged
df['Rotation'] = 'Even'
bandit_df['Rotation'] = 'Optimise'
bandit_df.rename(columns={"variable":"Strategy"}, inplace=True)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="Strategy", y="value", hue="Rotation", data=pd.concat([df, bandit_df],ignore_index=True))
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()
n_ads = [("02", lambda s: pick_best(2, s)),
("03", lambda s: pick_best(3, s)),
("04", lambda s: pick_best(4, s)),
("05", lambda s: pick_best(5, s)),
("06", lambda s: pick_best(6, s)),
("07", lambda s: pick_best(7, s)),
("08", lambda s: pick_best(8, s)),
("09", lambda s: pick_best(9, s)),
("10", lambda s: pick_best(10, s))
]
n_ads.append(("cheat",cheat))
if os.path.isfile("nads1.csv"):
r = pd.read_csv("nads1.csv",index_col=0)
r["row"] = r.index % 1000
n_ads_results = r.pivot(index="row",columns="variable",values="value")
else:
n_ads_results = {}
for (label,game) in tqdm_notebook(n_ads,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playGame(game,52)
res.append(c)
n_ads_results[label] = res
n_ads_results = pd.DataFrame(n_ads_results)
x = pd.DataFrame(pd.DataFrame.mean(n_ads_results))
x.columns = ['Clicks']
x.index.name = "Strategy/No. of ads"
x.reset_index(inplace=True)
x
n_ads_df = pd.melt(n_ads_results)
if not os.path.isfile("nads1.csv"):
n_ads_df.to_csv("nads1.csv")
plt.figure(figsize=(12,8))
p = sns.boxplot(x="Strategy/No. of ads", y="value", data=n_ads_df)
p.set(xlabel="Number of ads", ylabel="Clicks")
sns.plt.show()
if os.path.isfile("nads_bandit1.csv"):
r = pd.read_csv("nads_bandit1.csv",index_col=0)
r["row"] = r.index % 1000
n_ads_bandit_results = r.pivot(index="row",columns="variable",values="value")
else:
n_ads_bandit_results = {}
for (label,game) in tqdm_notebook(n_ads,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playEGGame(game,52)
res.append(c)
n_ads_bandit_results[label] = res
n_ads_bandit_results = pd.DataFrame(n_ads_bandit_results)
x = pd.DataFrame(pd.DataFrame.mean(n_ads_bandit_results))
x.columns = ['Clicks']
x.index.name = "Strategy/No. of ads"
x.reset_index(inplace=True)
x
With a bandit, adding more ads is very helpful. Throw shit against the wall and let the algorithm sort it out
n_ads_bandit_df = pd.melt(pd.DataFrame(n_ads_bandit_results))
if not os.path.isfile("nads_bandit1.csv"):
n_ads_bandit_df.to_csv("nads_bandit1.csv")
plt.figure(figsize=(12,8))
p = sns.boxplot(x="Strategy/No. of ads", y="value", data=n_ads_bandit_df)
p.set(xlabel="Number of Adverts", ylabel="Clicks")
sns.plt.show()
def play100ImpressionsGame(strategy,turns):
s = ClicksGame()
s.weeklyimpressions = 100
while s.state['turns'] < turns:
strategy(s)
return(s.total_clicks())
results_100Impressions = {}
for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = play100ImpressionsGame(game,52)
res.append(c)
results_100Impressions[label] = res
results_100Impressions = pd.DataFrame(results_100Impressions)
df_100impressions = pd.melt(results_100Impressions)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=df_100impressions)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()
def play50ImpressionsGame(strategy,turns):
s = ClicksGame()
s.weeklyimpressions = 50
while s.state['turns'] < turns:
strategy(s)
return(s.total_clicks())
results_50Impressions = {}
for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = play50ImpressionsGame(game,52)
res.append(c)
results_50Impressions[label] = res
results_50Impressions = pd.DataFrame(results_50Impressions)
df_50impressions = pd.melt(results_50Impressions)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=df_50impressions)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()
def playEndResultGame(strategy,turns):
s = ClicksGame()
while s.state['turns'] < turns:
strategy(s)
ads = s.state['active_ads']
best = max([ads[i]['trueCtr'] for i in ads])
return(best,s.state["n_ads"])
end_results = {}
end_results_n_ads = {}
for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
n_ads = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c,n = playEndResultGame(game,10)
res.append(c)
n_ads.append(n)
end_results[label] = res
end_results_n_ads[label] = n_ads
end_results = pd.DataFrame(end_results)
df_end_results = pd.melt(end_results)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=df_end_results)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()
pd.DataFrame.mean(pd.DataFrame(end_results_n_ads))
def playEndResult100ImpressionsGame(strategy,turns):
s = ClicksGame()
s.weeklyimpressions = 100
while s.state['turns'] < turns:
strategy(s)
ads = s.state['active_ads']
best = max([ads[i]['trueCtr'] for i in ads])
return(best)
end_results_100 = {}
for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playEndResult100ImpressionsGame(game,10)
res.append(c)
end_results_100[label] = res
end_results_100 = pd.DataFrame(end_results_100)
df_end_results_100 = pd.melt(end_results_100)
plt.figure(figsize=(15,15))
p = sns.boxplot(x="variable", y="value", data=df_end_results_100)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()
def playRestrictedVariationsGame(strategy,ads):
s = ClicksGame()
while s.state['n_ads'] < ads:
strategy(s)
ads = s.state['active_ads']
best = max([ads[i]['trueCtr'] for i in ads])
return(best)
restricted_variations = {}
for (label,game) in tqdm_notebook(games[1:],desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playRestrictedVariationsGame(game,5)
res.append(c)
restricted_variations[label] = res
restricted_variations = pd.DataFrame(restricted_variations)
restricted_variations_df = pd.melt(restricted_variations)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=restricted_variations_df)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()
def playRestrictedVariationsGame50(strategy,ads):
s = ClicksGame()
s.weeklyimpressions = 100
while s.state['n_ads'] < ads and s.state['turns'] < 50:
strategy(s)
ads = s.state['active_ads']
best = max([ads[i]['trueCtr'] for i in ads])
return(best)
restricted_variations_50 = {}
for (label,game) in tqdm_notebook(games[1:],desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playRestrictedVariationsGame(game,5)
res.append(c)
restricted_variations_50[label] = res
restricted_variations_50 = pd.DataFrame(restricted_variations_50)
restricted_variations_50_df = pd.melt(restricted_variations)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=restricted_variations_50_df)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()