Scipy for some statistics, numpy for random number generation and pandas for the data frames

In [4]:
import scipy.stats as stats
import numpy as np
import pandas as pd


The ClicksGame class implements all the state for the game

In [5]:
class ClicksGame:
# alpha and beta determine the distribution from which
# new click through rates are drawn.
# alpha=5, beta=100 will give a very narrow distribution
# with little variance in click through rates.
# alpha=2, beta=20 is a little fatter
alpha = 2
beta = 20

# weeklyimpressions is the number of trials per week in total
# The trials are distributed between the ads according to the
# rotation method
weeklyimpressions = 1000

# The starting state - no ads.
state = {}

def __init__(self, rotation='even', epsilon=0.1):
if rotation == 'even':
self.rotation = self.evenAllocation
elif rotation == 'optimise':
self.rotation = self.epsilonGreedy
self.epsilon = epsilon
'turns': 0,
}
def take_turn(self):
self.rotation()

# First randomly generate the click through rate
ctr = np.random.beta(self.alpha,self.beta,1)[0]
'clicks':0,
'trueCtr': ctr
}

# Pause advert with id i.

def total_clicks(self):
clicks = 0
return(clicks)

def evenAllocation(self):
# Randomly allocate impressions between all active adverts
self.state['turns'] += 1
impressions = np.random.multinomial(self.weeklyimpressions,
[1/n_active]*n_active,
size=1
)
impressions = np.ndarray.tolist(impressions)[0]
imps = impressions.pop()

def epsilonGreedy(self):
# Epsilon greedy bandit allocation
# Allocated self.epsilon percent of impressions randomly
# The remaining 1-self.epsilon percent are given to the ad
# with the best observed ctr.
#
# N.B. This allocation method takes a lot longer to run than
# even allocation. This will always be the case I think but this
# isn't to say that the below code can't be made much quicker.
self.state['turns'] += 1
for i in range(self.weeklyimpressions):
if np.random.uniform(0,1,1)[0] < self.epsilon:
#explore
else:
#exploit
s = self.state
if s['active_ads'][x]['impressions'] is not 0 else 0
maxctr = max(ctrs)
pass
else:



Quick demonstration game where we add two adverts and then run a week:

In [6]:
c = ClicksGame(rotation='optimise')
c.take_turn()
c.state

Out[6]:
{'active_ads': {1: {'clicks': 88,
'impressions': 779,
'trueCtr': 0.10795954112418148},
2: {'clicks': 15, 'impressions': 214, 'trueCtr': 0.088679615145492843}},
'turns': 1}

Now start to define some different ad test strategies.

In [7]:
def doNothing(c):
c.take_turn()

In [8]:
c = ClicksGame()
doNothing(c)
doNothing(c)
c.state

Out[8]:
{'active_ads': {1: {'clicks': 82,
'impressions': 1011,
'trueCtr': 0.083759471000148675},
2: {'clicks': 82, 'impressions': 989, 'trueCtr': 0.072707733964301746}},
'turns': 2}

A strategy which is impossible in real life but which is possible in the game is to cheat by looking at the true click through rate and picking the advert that is best.

It is useful to have this strategy as a comparison; any other strategy that does nearly as well as cheating must be pretty good.

In [9]:
def cheat(c):
c.take_turn()
s = c.state
maxctr = max(ctrs)
to_pause = []
to_pause.append(i)
for i in to_pause:

In [10]:
c = ClicksGame()
while c.state['turns'] < 5:
cheat(c)

c.state

Out[10]:
{'active_ads': {5: {'clicks': 204,
'impressions': 1004,
'trueCtr': 0.18339487222248058}},
'impressions': 504,
'trueCtr': 0.042531824031978249},
2: {'clicks': 118, 'impressions': 1484, 'trueCtr': 0.07558811885068413},
3: {'clicks': 23, 'impressions': 519, 'trueCtr': 0.052936233078316852},
4: {'clicks': 102, 'impressions': 982, 'trueCtr': 0.080658421522488699},
6: {'clicks': 40, 'impressions': 507, 'trueCtr': 0.092294268564702811}},
'turns': 5}

Another strawman strategy is to pick adverts to pause at random.

In [11]:
def random(c):
c.take_turn()
s = c.state

In [12]:
s = ClicksGame()
while s.state['turns'] < 5:
random(s)

s.state

Out[12]:
{'active_ads': {6: {'clicks': 111,
'impressions': 501,
'trueCtr': 0.1914446524341035}},
'impressions': 2439,
'trueCtr': 0.065348580171771239},
2: {'clicks': 22, 'impressions': 497, 'trueCtr': 0.045655369615622714},
3: {'clicks': 59, 'impressions': 526, 'trueCtr': 0.099652566043553659},
4: {'clicks': 36, 'impressions': 528, 'trueCtr': 0.069917814742279122},
5: {'clicks': 177, 'impressions': 509, 'trueCtr': 0.33433599714983936}},
'turns': 5}

A more common strategy is to do nothing until a chi-squared test has a p-value below a certain threshold.

The pick the ad with the best observed ctr.

In [13]:
def chi2(pvalue,c):
c.take_turn()
clicks = c.total_clicks()
if clicks > 0:
obs = []
p = stats.chi2_contingency(obs)[1]
if p < pvalue:
s = c.state
maxctr = max(ctrs)
to_pause = []
if obs_ctr < maxctr:
to_pause.append(i)
for i in to_pause:


In [14]:
s = ClicksGame()
while s.state['turns'] < 10:
chi2(0.1,s)

s.state

Out[14]:
{'active_ads': {2: {'clicks': 824,
'impressions': 5006,
'trueCtr': 0.16606703563697534},
5: {'clicks': 590, 'impressions': 3490, 'trueCtr': 0.16131558839301935}},
'impressions': 507,
'trueCtr': 0.037307833755827212},
3: {'clicks': 19, 'impressions': 520, 'trueCtr': 0.029859760520460937},
4: {'clicks': 37, 'impressions': 477, 'trueCtr': 0.080969023930524034}},
'turns': 10}

The G Test is very similar to chi-squared. Some people consider it more appropriate for this kind of problem

In [15]:
def g_test(pvalue,c):
c.take_turn()
clicks = c.total_clicks()
if clicks > 0:
obs = []
p = stats.chi2_contingency(obs,lambda_="log-likelihood")[1]
if p < pvalue:
s = c.state
maxctr = max(ctrs)
to_pause = []
if obs_ctr < maxctr:
to_pause.append(i)
for i in to_pause:


An easy thing to do is to just pick the best advert and pause all the others.

In [16]:
def pick_best(n_adverts,c):
c.take_turn()
s = c.state
maxctr = max(ctrs)
to_pause = []
if obs_ctr < maxctr:
to_pause.append(i)
for i in to_pause:



The tqdm package provides nice progress bars.

In [17]:
from tqdm import tnrange,tqdm_notebook


playGame runs a strategy for a specified number of turns using even ad rotation

In [18]:
def playGame(strategy,turns):
s = ClicksGame()
while s.state['turns'] < turns:
strategy(s)
return(s.total_clicks())

In [19]:
games = [
('nothing',doNothing),
('cheat', cheat),
('random', random),
('chi2_0.01', lambda s: chi2(0.01,s)),
('chi2_0.05', lambda s: chi2(0.05,s)),
('chi2_0.10', lambda s: chi2(0.1,s)),
('chi2_0.20', lambda s: chi2(0.2,s)),
('gtest_0.01', lambda s: g_test(0.01,s)),
('gtest_0.05', lambda s: g_test(0.05,s)),
('gtest_0.10', lambda s: g_test(0.1,s)),
('gtest_0.20', lambda s: g_test(0.2,s)),
('pick_best', lambda s: pick_best(2,s))
]


Play all games

In [20]:
import os

if os.path.isfile("rotate1.csv"):
r["row"] = r.index % 1000
results = r.pivot(index="row",columns="variable",values="value")
else:
results = {}
for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playGame(game,52)
res.append(c)
results[label] = res
results = pd.DataFrame(results)

In [21]:
x = pd.DataFrame(pd.DataFrame.mean(results))
x.columns = ['Clicks']
x.index.name = "Strategy"
x.reset_index(inplace=True)

x

Out[21]:
Strategy Clicks
0 cheat 8422.771
1 chi2_0.01 7699.615
2 chi2_0.05 7896.281
3 chi2_0.10 8049.524
4 chi2_0.20 8199.191
5 gtest_0.01 7655.258
6 gtest_0.05 7950.070
7 gtest_0.10 7967.160
8 gtest_0.20 8131.526
9 nothing 4738.289
10 pick_best 8287.084
11 random 4723.216

Cheat performs the best (obviously). Out of all the other strategies, the general trend is that higher p-values perform better and best of all is to just pick the ad with the highest observed ctr.

In [22]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [47]:
df = pd.melt(results)
if not os.path.isfile("rotate1.csv"):
df.to_csv("rotate1.csv")
plt.figure(figsize=(24,16))
p = sns.boxplot(x="Strategy", y="value", data=df)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()


Does using the epsilon-greedy algorithm to optimise ad rotation make a difference?

(This code takes a while to run - nearly 3 hours on my machine).

In [24]:
def playEGGame(strategy,turns):
s = ClicksGame(rotation='optimise')
while s.state['turns'] < turns:
strategy(s)
return(s.total_clicks())

if os.path.isfile("bandit1.csv"):
r["row"] = r.index % 1000
bandit_results = r.pivot(index="row",columns="variable",values="value")
else:
bandit_results = {}
for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playEGGame(game,52)
res.append(c)
bandit_results[label] = res
bandit_results = pd.DataFrame(bandit_results)

In [25]:
bandit_df = pd.melt(bandit_results)
if not os.path.isfile("bandit1.csv"):
bandit_df.to_csv("bandit1.csv")
plt.figure(figsize=(15,15))
p = sns.boxplot(x="Strategy", y="value", data=df)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()

In [ ]:


In [26]:
b = pd.DataFrame(pd.DataFrame.mean(bandit_results))
b.columns = ['Bandit']
b.index.name = "Strategy"
b.reset_index(inplace=True)
e = pd.DataFrame(pd.DataFrame.mean(results))
e.columns = ['Even']
e.index.name = "Strategy"
e.reset_index(inplace=True)

merged = pd.merge(b,e,on="Strategy")
merged['Difference'] = merged['Bandit'] - merged['Even']
merged['Percentage'] = merged['Bandit']/merged['Even'] - 1
merged

Out[26]:
Strategy Bandit Even Difference Percentage
0 cheat 11777.088 8422.771 3354.317 0.398244
1 chi2_0.01 7786.134 7699.615 86.519 0.011237
2 chi2_0.05 8574.060 7896.281 677.779 0.085835
3 chi2_0.10 8734.480 8049.524 684.956 0.085093
4 chi2_0.20 9493.828 8199.191 1294.637 0.157898
5 gtest_0.01 7958.364 7655.258 303.106 0.039594
6 gtest_0.05 8579.690 7950.070 629.620 0.079197
7 gtest_0.10 8975.440 7967.160 1008.280 0.126555
8 gtest_0.20 9536.533 8131.526 1405.007 0.172785
9 nothing 6234.972 4738.289 1496.683 0.315870
10 pick_best 11487.823 8287.084 3200.739 0.386232
11 random 5904.835 4723.216 1181.619 0.250173
In [57]:
df['Rotation'] = 'Even'
bandit_df['Rotation'] = 'Optimise'
bandit_df.rename(columns={"variable":"Strategy"}, inplace=True)

plt.figure(figsize=(12,8))
p = sns.boxplot(x="Strategy", y="value", hue="Rotation", data=pd.concat([df, bandit_df],ignore_index=True))
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()

In [28]:
n_ads = [("02", lambda s: pick_best(2, s)),
("03", lambda s: pick_best(3, s)),
("04", lambda s: pick_best(4, s)),
("05", lambda s: pick_best(5, s)),
("06", lambda s: pick_best(6, s)),
("07", lambda s: pick_best(7, s)),
("08", lambda s: pick_best(8, s)),
("09", lambda s: pick_best(9, s)),
("10", lambda s: pick_best(10, s))
]

r["row"] = r.index % 1000
else:
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playGame(game,52)
res.append(c)

In [29]:
x = pd.DataFrame(pd.DataFrame.mean(n_ads_results))
x.columns = ['Clicks']
x.reset_index(inplace=True)

x

Out[29]:
0 02 8328.438
1 03 7555.274
2 04 7087.282
3 05 6681.802
4 06 6392.498
5 07 6217.475
6 08 6043.088
7 09 5900.112
8 10 5802.490
9 cheat 8361.216
In [49]:
n_ads_df = pd.melt(n_ads_results)
plt.figure(figsize=(12,8))
sns.plt.show()

In [31]:
if os.path.isfile("nads_bandit1.csv"):
r["row"] = r.index % 1000
else:
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playEGGame(game,52)
res.append(c)

In [32]:
x = pd.DataFrame(pd.DataFrame.mean(n_ads_bandit_results))
x.columns = ['Clicks']
x.reset_index(inplace=True)

x

Out[32]:
0 02 11329.121
1 03 12573.173
2 04 13058.768
3 05 13423.703
4 06 13702.728
5 07 13895.402
6 08 14163.068
7 09 14142.196
8 10 14374.204
9 cheat 11720.622

With a bandit, adding more ads is very helpful. Throw shit against the wall and let the algorithm sort it out

In [59]:
n_ads_bandit_df = pd.melt(pd.DataFrame(n_ads_bandit_results))
plt.figure(figsize=(12,8))
sns.plt.show()

In [34]:
def play100ImpressionsGame(strategy,turns):
s = ClicksGame()
s.weeklyimpressions = 100
while s.state['turns'] < turns:
strategy(s)
return(s.total_clicks())

results_100Impressions = {}

for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = play100ImpressionsGame(game,52)
res.append(c)
results_100Impressions[label] = res

results_100Impressions = pd.DataFrame(results_100Impressions)


In [60]:
df_100impressions = pd.melt(results_100Impressions)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=df_100impressions)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()

In [61]:
def play50ImpressionsGame(strategy,turns):
s = ClicksGame()
s.weeklyimpressions = 50
while s.state['turns'] < turns:
strategy(s)
return(s.total_clicks())

results_50Impressions = {}

for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = play50ImpressionsGame(game,52)
res.append(c)
results_50Impressions[label] = res

results_50Impressions = pd.DataFrame(results_50Impressions)

df_50impressions = pd.melt(results_50Impressions)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=df_50impressions)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()


# Maximise End Result¶

In [64]:
def playEndResultGame(strategy,turns):
s = ClicksGame()
while s.state['turns'] < turns:
strategy(s)

In [66]:
end_results = {}

for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c,n = playEndResultGame(game,10)
res.append(c)
end_results[label] = res


In [62]:
end_results = pd.DataFrame(end_results)

df_end_results = pd.melt(end_results)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=df_end_results)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()

In [71]:
pd.DataFrame.mean(pd.DataFrame(end_results_n_ads))

Out[71]:
cheat         11.000
chi2_0.01      6.221
chi2_0.05      7.041
chi2_0.10      7.604
chi2_0.20      8.212
gtest_0.01     6.188
gtest_0.05     6.901
gtest_0.10     7.514
gtest_0.20     8.219
nothing        2.000
pick_best     11.000
random        11.000
dtype: float64
In [46]:
def playEndResult100ImpressionsGame(strategy,turns):
s = ClicksGame()
s.weeklyimpressions = 100
while s.state['turns'] < turns:
strategy(s)
return(best)

end_results_100 = {}

for (label,game) in tqdm_notebook(games,desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playEndResult100ImpressionsGame(game,10)
res.append(c)
end_results_100[label] = res

end_results_100 = pd.DataFrame(end_results_100)

df_end_results_100 = pd.melt(end_results_100)
plt.figure(figsize=(15,15))
p = sns.boxplot(x="variable", y="value", data=df_end_results_100)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()


In [76]:
def playRestrictedVariationsGame(strategy,ads):
s = ClicksGame()
strategy(s)
return(best)

restricted_variations = {}

for (label,game) in tqdm_notebook(games[1:],desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playRestrictedVariationsGame(game,5)
res.append(c)
restricted_variations[label] = res

restricted_variations = pd.DataFrame(restricted_variations)

restricted_variations_df = pd.melt(restricted_variations)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=restricted_variations_df)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()

744/|/ 74%|| 744/1000 [01:39<00:34,  7.47it/s]
456/|/ 46%|| 456/1000 [00:29<00:35, 15.31it/s]
422/|/ 42%|| 422/1000 [00:17<00:23, 24.33it/s]
581/|/ 58%|| 581/1000 [01:58<01:25,  4.89it/s]

In [87]:
def playRestrictedVariationsGame50(strategy,ads):
s = ClicksGame()
s.weeklyimpressions = 100
strategy(s)
return(best)

restricted_variations_50 = {}

for (label,game) in tqdm_notebook(games[1:],desc="Strategy"):
res = []
for i in tqdm_notebook(range(1000), desc=label, leave=False):
c = playRestrictedVariationsGame(game,5)
res.append(c)
restricted_variations_50[label] = res

restricted_variations_50 = pd.DataFrame(restricted_variations_50)

restricted_variations_50_df = pd.melt(restricted_variations)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=restricted_variations_50_df)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()

805/|/ 80%|| 805/1000 [7:03:20<1:42:33, 31.55s/it]
931/|/ 93%|| 931/1000 [00:56<00:04, 16.35it/s]
669/|/ 67%|| 669/1000 [00:17<00:08, 37.30it/s]
836/|/ 84%|| 836/1000 [01:14<00:14, 11.24it/s]

In [ ]: