A Game of Ads

Start with some imports.

Scipy for some statistics, numpy for random number generation and pandas for the data frames

In [4]:
import scipy.stats as stats
import numpy as np
import pandas as pd

The ClicksGame class implements all the state for the game

In [5]:
class ClicksGame:
    # alpha and beta determine the distribution from which
    # new click through rates are drawn. 
    # alpha=5, beta=100 will give a very narrow distribution
    # with little variance in click through rates.
    # alpha=2, beta=20 is a little fatter 
    alpha = 2
    beta = 20
    
    # weeklyimpressions is the number of trials per week in total
    # The trials are distributed between the ads according to the
    # rotation method
    weeklyimpressions = 1000
    
    # The starting state - no ads.
    state = {}
        
    def __init__(self, rotation='even', epsilon=0.1):
        if rotation == 'even':
            self.rotation = self.evenAllocation
        elif rotation == 'optimise':
            self.rotation = self.epsilonGreedy
            self.epsilon = epsilon
        self.state = {'active_ads': {},
                      'paused_ads': {},
                      'n_ads': 0,
                      'turns': 0,
                     }
    def take_turn(self):
        self.rotation()
    
    def n_ads(self):
        return(self.state['n_ads'])
        
    def n_active_ads(self):
        return(len(self.state['active_ads']))
    
        
    def addAdvert(self):
        # Adds an advert.
        # First randomly generate the click through rate
        ctr = np.random.beta(self.alpha,self.beta,1)[0]
        ad = {'impressions':0,
              'clicks':0,
              'trueCtr': ctr
             }
        n_ads = self.state['n_ads']
        self.state['active_ads'][n_ads+1] = ad
        self.state['n_ads'] += 1
        
    def pauseAdvert(self,i):
        # Pause advert with id i.
        if i in self.state['active_ads']:
            ad = self.state['active_ads'].pop(i)
            self.state['paused_ads'][i] = ad
            
    def total_clicks(self):
        clicks = 0
        for i in self.state['active_ads']:
            clicks += self.state['active_ads'][i]['clicks']
        for i in self.state['paused_ads']:
            clicks += self.state['paused_ads'][i]['clicks']
        return(clicks)
            
    def evenAllocation(self):
        # Randomly allocate impressions between all active adverts
        self.state['turns'] += 1
        if self.state['active_ads']:
            n_active = len(self.state['active_ads'])
            impressions = np.random.multinomial(self.weeklyimpressions, 
                                                [1/n_active]*n_active,
                                                size=1
                                                )
            impressions = np.ndarray.tolist(impressions)[0]
            for adix in self.state['active_ads']:
                ad = self.state['active_ads'][adix]
                imps = impressions.pop()
                clicks = np.random.binomial(imps, ad['trueCtr'], 1)[0]
                self.state['active_ads'][adix]['impressions'] += imps
                self.state['active_ads'][adix]['clicks'] += clicks
                
    def epsilonGreedy(self):
        # Epsilon greedy bandit allocation
        # Allocated self.epsilon percent of impressions randomly
        # The remaining 1-self.epsilon percent are given to the ad 
        # with the best observed ctr.
        #
        # N.B. This allocation method takes a lot longer to run than 
        # even allocation. This will always be the case I think but this
        # isn't to say that the below code can't be made much quicker.
        self.state['turns'] += 1
        if self.state['active_ads']:
            for i in range(self.weeklyimpressions):
                if np.random.uniform(0,1,1)[0] < self.epsilon:
                    #explore
                    adix = np.random.choice(list(self.state['active_ads'].keys()),1)[0]
                    self.state['active_ads'][adix]['impressions'] += 1
                    click = np.random.uniform(0,1,1)[0] < self.state['active_ads'][adix]['trueCtr']
                    self.state['active_ads'][adix]['clicks'] += click
                else:
                    #exploit
                    s = self.state
                    ctrs = [s['active_ads'][x]['clicks']/s['active_ads'][x]['impressions'] 
                            if s['active_ads'][x]['impressions'] is not 0 else 0
                            for x in s['active_ads']]
                    maxctr = max(ctrs)
                    for adix in s['active_ads']:
                        if s['active_ads'][adix]['impressions'] == 0 or s['active_ads'][adix]['clicks']/s['active_ads'][adix]['impressions'] < maxctr:
                            pass
                        else:
                            self.state['active_ads'][adix]['impressions'] += 1
                            click = np.random.uniform(0,1,1)[0] < self.state['active_ads'][adix]['trueCtr']
                            self.state['active_ads'][adix]['clicks'] += click
            

Quick demonstration game where we add two adverts and then run a week:

In [6]:
c = ClicksGame(rotation='optimise')
c.addAdvert()
c.addAdvert()
c.take_turn()
c.state
Out[6]:
{'active_ads': {1: {'clicks': 88,
   'impressions': 779,
   'trueCtr': 0.10795954112418148},
  2: {'clicks': 15, 'impressions': 214, 'trueCtr': 0.088679615145492843}},
 'n_ads': 2,
 'paused_ads': {},
 'turns': 1}

Now start to define some different ad test strategies.

The simplest is to add two adverts and then do nothing.

In [7]:
def doNothing(c):
    while c.n_active_ads() < 2:
        c.addAdvert()
    c.take_turn()
In [8]:
c = ClicksGame()
doNothing(c)
doNothing(c)
c.state
Out[8]:
{'active_ads': {1: {'clicks': 82,
   'impressions': 1011,
   'trueCtr': 0.083759471000148675},
  2: {'clicks': 82, 'impressions': 989, 'trueCtr': 0.072707733964301746}},
 'n_ads': 2,
 'paused_ads': {},
 'turns': 2}

A strategy which is impossible in real life but which is possible in the game is to cheat by looking at the true click through rate and picking the advert that is best.

It is useful to have this strategy as a comparison; any other strategy that does nearly as well as cheating must be pretty good.

In [9]:
def cheat(c):
    while c.n_active_ads() < 2:
        c.addAdvert()
    c.take_turn()
    s = c.state
    ctrs = [s['active_ads'][x]['trueCtr'] for x in s['active_ads']]
    maxctr = max(ctrs)
    to_pause = []
    for i in s['active_ads']:
        if s['active_ads'][i]['trueCtr'] < maxctr:
            to_pause.append(i)
    for i in to_pause:
        c.pauseAdvert(i)
In [10]:
c = ClicksGame()
while c.state['turns'] < 5:
    cheat(c)
    
c.state
Out[10]:
{'active_ads': {5: {'clicks': 204,
   'impressions': 1004,
   'trueCtr': 0.18339487222248058}},
 'n_ads': 6,
 'paused_ads': {1: {'clicks': 21,
   'impressions': 504,
   'trueCtr': 0.042531824031978249},
  2: {'clicks': 118, 'impressions': 1484, 'trueCtr': 0.07558811885068413},
  3: {'clicks': 23, 'impressions': 519, 'trueCtr': 0.052936233078316852},
  4: {'clicks': 102, 'impressions': 982, 'trueCtr': 0.080658421522488699},
  6: {'clicks': 40, 'impressions': 507, 'trueCtr': 0.092294268564702811}},
 'turns': 5}

Another strawman strategy is to pick adverts to pause at random.

In [11]:
def random(c):
    while c.n_active_ads() < 2:
        c.addAdvert()
    c.take_turn()
    s = c.state
    ads = list(s['active_ads'].keys())
    ad_to_pause = np.random.choice(ads,1)[0]
    c.pauseAdvert(ad_to_pause)
In [12]:
s = ClicksGame()
while s.state['turns'] < 5:
    random(s)
    
s.state
Out[12]:
{'active_ads': {6: {'clicks': 111,
   'impressions': 501,
   'trueCtr': 0.1914446524341035}},
 'n_ads': 6,
 'paused_ads': {1: {'clicks': 154,
   'impressions': 2439,
   'trueCtr': 0.065348580171771239},
  2: {'clicks': 22, 'impressions': 497, 'trueCtr': 0.045655369615622714},
  3: {'clicks': 59, 'impressions': 526, 'trueCtr': 0.099652566043553659},
  4: {'clicks': 36, 'impressions': 528, 'trueCtr': 0.069917814742279122},
  5: {'clicks': 177, 'impressions': 509, 'trueCtr': 0.33433599714983936}},
 'turns': 5}

A more common strategy is to do nothing until a chi-squared test has a p-value below a certain threshold.

The pick the ad with the best observed ctr.

In [13]:
def chi2(pvalue,c):
    while c.n_active_ads() < 2:
        c.addAdvert()
    c.take_turn()
    clicks = c.total_clicks()
    if clicks > 0:
        obs = []
        for i in c.state['active_ads']:
            ad = c.state['active_ads'][i]
            obs.append([ad['clicks'], ad['impressions']-ad['clicks']])
        p = stats.chi2_contingency(obs)[1]
        if p < pvalue:
            s = c.state
            ctrs = [s['active_ads'][x]['clicks']/s['active_ads'][x]['impressions'] for x in s['active_ads']]
            maxctr = max(ctrs)
            to_pause = []
            for i in s['active_ads']:
                ad = s['active_ads'][i]
                obs_ctr = ad['clicks']/ad['impressions']
                if obs_ctr < maxctr:
                    to_pause.append(i)
            for i in to_pause:
                c.pauseAdvert(i)        
    
In [14]:
s = ClicksGame()
while s.state['turns'] < 10:
    chi2(0.1,s)
    
s.state
Out[14]:
{'active_ads': {2: {'clicks': 824,
   'impressions': 5006,
   'trueCtr': 0.16606703563697534},
  5: {'clicks': 590, 'impressions': 3490, 'trueCtr': 0.16131558839301935}},
 'n_ads': 5,
 'paused_ads': {1: {'clicks': 17,
   'impressions': 507,
   'trueCtr': 0.037307833755827212},
  3: {'clicks': 19, 'impressions': 520, 'trueCtr': 0.029859760520460937},
  4: {'clicks': 37, 'impressions': 477, 'trueCtr': 0.080969023930524034}},
 'turns': 10}

The G Test is very similar to chi-squared. Some people consider it more appropriate for this kind of problem

In [15]:
def g_test(pvalue,c):
    while c.n_active_ads() < 2:
        c.addAdvert()
    c.take_turn()
    clicks = c.total_clicks()
    if clicks > 0:
        obs = []
        for i in c.state['active_ads']:
            ad = c.state['active_ads'][i]
            obs.append([ad['clicks'], ad['impressions']-ad['clicks']])
        p = stats.chi2_contingency(obs,lambda_="log-likelihood")[1]
        if p < pvalue:
            s = c.state
            ctrs = [s['active_ads'][x]['clicks']/s['active_ads'][x]['impressions'] for x in s['active_ads']]
            maxctr = max(ctrs)
            to_pause = []
            for i in s['active_ads']:
                ad = s['active_ads'][i]
                obs_ctr = ad['clicks']/ad['impressions']
                if obs_ctr < maxctr:
                    to_pause.append(i)
            for i in to_pause:
                c.pauseAdvert(i)              

An easy thing to do is to just pick the best advert and pause all the others.

In [16]:
def pick_best(n_adverts,c):
    while c.n_active_ads() < n_adverts:
        c.addAdvert()
    c.take_turn()
    s = c.state
    ctrs = [s['active_ads'][x]['clicks']/s['active_ads'][x]['impressions'] if s['active_ads'][x]['impressions'] is not 0 else 0 for x in s['active_ads']]
    maxctr = max(ctrs)
    to_pause = []
    for i in s['active_ads']:
        ad = s['active_ads'][i]
        obs_ctr = ad['clicks']/ad['impressions'] if ad['impressions'] is not 0 else 0
        if obs_ctr < maxctr:
            to_pause.append(i)
    for i in to_pause:
        c.pauseAdvert(i)         
    

The tqdm package provides nice progress bars.

In [17]:
from tqdm import tnrange,tqdm_notebook

playGame runs a strategy for a specified number of turns using even ad rotation

In [18]:
def playGame(strategy,turns):
    s = ClicksGame()
    while s.state['turns'] < turns:
        strategy(s)
    return(s.total_clicks())
In [19]:
games = [
    ('nothing',doNothing),
    ('cheat', cheat),
    ('random', random),
    ('chi2_0.01', lambda s: chi2(0.01,s)),
    ('chi2_0.05', lambda s: chi2(0.05,s)),
    ('chi2_0.10', lambda s: chi2(0.1,s)),
    ('chi2_0.20', lambda s: chi2(0.2,s)),
    ('gtest_0.01', lambda s: g_test(0.01,s)),
    ('gtest_0.05', lambda s: g_test(0.05,s)),
    ('gtest_0.10', lambda s: g_test(0.1,s)),
    ('gtest_0.20', lambda s: g_test(0.2,s)),
    ('pick_best', lambda s: pick_best(2,s))
]

Play all games

In [20]:
import os

if os.path.isfile("rotate1.csv"):
    r = pd.read_csv("rotate1.csv",index_col=0)
    r["row"] = r.index % 1000
    results = r.pivot(index="row",columns="variable",values="value")
else:
    results = {}
    for (label,game) in tqdm_notebook(games,desc="Strategy"):
        res = []
        for i in tqdm_notebook(range(1000), desc=label, leave=False):
            c = playGame(game,52)
            res.append(c)
        results[label] = res
    results = pd.DataFrame(results)
In [21]:
x = pd.DataFrame(pd.DataFrame.mean(results))
x.columns = ['Clicks']
x.index.name = "Strategy"
x.reset_index(inplace=True)

x
Out[21]:
Strategy Clicks
0 cheat 8422.771
1 chi2_0.01 7699.615
2 chi2_0.05 7896.281
3 chi2_0.10 8049.524
4 chi2_0.20 8199.191
5 gtest_0.01 7655.258
6 gtest_0.05 7950.070
7 gtest_0.10 7967.160
8 gtest_0.20 8131.526
9 nothing 4738.289
10 pick_best 8287.084
11 random 4723.216

Cheat performs the best (obviously). Out of all the other strategies, the general trend is that higher p-values perform better and best of all is to just pick the ad with the highest observed ctr.

In [22]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
In [47]:
df = pd.melt(results)
if not os.path.isfile("rotate1.csv"):
    df.to_csv("rotate1.csv")
plt.figure(figsize=(24,16))
p = sns.boxplot(x="Strategy", y="value", data=df)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()

Does using the epsilon-greedy algorithm to optimise ad rotation make a difference?

(This code takes a while to run - nearly 3 hours on my machine).

In [24]:
def playEGGame(strategy,turns):
    s = ClicksGame(rotation='optimise')
    while s.state['turns'] < turns:
        strategy(s)
    return(s.total_clicks())

if os.path.isfile("bandit1.csv"):
    r = pd.read_csv("bandit1.csv",index_col=0)
    r["row"] = r.index % 1000
    bandit_results = r.pivot(index="row",columns="variable",values="value")
else:
    bandit_results = {}
    for (label,game) in tqdm_notebook(games,desc="Strategy"):
        res = []
        for i in tqdm_notebook(range(1000), desc=label, leave=False):
            c = playEGGame(game,52)
            res.append(c)
        bandit_results[label] = res
    bandit_results = pd.DataFrame(bandit_results)
In [25]:
bandit_df = pd.melt(bandit_results)
if not os.path.isfile("bandit1.csv"):
    bandit_df.to_csv("bandit1.csv")
plt.figure(figsize=(15,15))
p = sns.boxplot(x="Strategy", y="value", data=df)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()
In [ ]:
 
In [26]:
b = pd.DataFrame(pd.DataFrame.mean(bandit_results))
b.columns = ['Bandit']
b.index.name = "Strategy"
b.reset_index(inplace=True)
e = pd.DataFrame(pd.DataFrame.mean(results))
e.columns = ['Even']
e.index.name = "Strategy"
e.reset_index(inplace=True)

merged = pd.merge(b,e,on="Strategy")
merged['Difference'] = merged['Bandit'] - merged['Even']
merged['Percentage'] = merged['Bandit']/merged['Even'] - 1
merged
Out[26]:
Strategy Bandit Even Difference Percentage
0 cheat 11777.088 8422.771 3354.317 0.398244
1 chi2_0.01 7786.134 7699.615 86.519 0.011237
2 chi2_0.05 8574.060 7896.281 677.779 0.085835
3 chi2_0.10 8734.480 8049.524 684.956 0.085093
4 chi2_0.20 9493.828 8199.191 1294.637 0.157898
5 gtest_0.01 7958.364 7655.258 303.106 0.039594
6 gtest_0.05 8579.690 7950.070 629.620 0.079197
7 gtest_0.10 8975.440 7967.160 1008.280 0.126555
8 gtest_0.20 9536.533 8131.526 1405.007 0.172785
9 nothing 6234.972 4738.289 1496.683 0.315870
10 pick_best 11487.823 8287.084 3200.739 0.386232
11 random 5904.835 4723.216 1181.619 0.250173
In [57]:
df['Rotation'] = 'Even'
bandit_df['Rotation'] = 'Optimise'
bandit_df.rename(columns={"variable":"Strategy"}, inplace=True)

plt.figure(figsize=(12,8))
p = sns.boxplot(x="Strategy", y="value", hue="Rotation", data=pd.concat([df, bandit_df],ignore_index=True))
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()
In [28]:
n_ads = [("02", lambda s: pick_best(2, s)),
         ("03", lambda s: pick_best(3, s)),
         ("04", lambda s: pick_best(4, s)),
         ("05", lambda s: pick_best(5, s)),
         ("06", lambda s: pick_best(6, s)),
         ("07", lambda s: pick_best(7, s)),
         ("08", lambda s: pick_best(8, s)),
         ("09", lambda s: pick_best(9, s)),
         ("10", lambda s: pick_best(10, s))
        ]
n_ads.append(("cheat",cheat))

if os.path.isfile("nads1.csv"):
    r = pd.read_csv("nads1.csv",index_col=0)
    r["row"] = r.index % 1000
    n_ads_results = r.pivot(index="row",columns="variable",values="value")
else:
    n_ads_results = {}
    for (label,game) in tqdm_notebook(n_ads,desc="Strategy"):
        res = []
        for i in tqdm_notebook(range(1000), desc=label, leave=False):
            c = playGame(game,52)
            res.append(c)
        n_ads_results[label] = res
    n_ads_results = pd.DataFrame(n_ads_results)
In [29]:
x = pd.DataFrame(pd.DataFrame.mean(n_ads_results))
x.columns = ['Clicks']
x.index.name = "Strategy/No. of ads"
x.reset_index(inplace=True)

x
Out[29]:
Strategy/No. of ads Clicks
0 02 8328.438
1 03 7555.274
2 04 7087.282
3 05 6681.802
4 06 6392.498
5 07 6217.475
6 08 6043.088
7 09 5900.112
8 10 5802.490
9 cheat 8361.216
In [49]:
n_ads_df = pd.melt(n_ads_results)
if not os.path.isfile("nads1.csv"):
    n_ads_df.to_csv("nads1.csv")
plt.figure(figsize=(12,8))
p = sns.boxplot(x="Strategy/No. of ads", y="value", data=n_ads_df)
p.set(xlabel="Number of ads", ylabel="Clicks")
sns.plt.show()
In [31]:
if os.path.isfile("nads_bandit1.csv"):
    r = pd.read_csv("nads_bandit1.csv",index_col=0)
    r["row"] = r.index % 1000
    n_ads_bandit_results = r.pivot(index="row",columns="variable",values="value")
else:
    n_ads_bandit_results = {}
    for (label,game) in tqdm_notebook(n_ads,desc="Strategy"):
        res = []
        for i in tqdm_notebook(range(1000), desc=label, leave=False):
            c = playEGGame(game,52)
            res.append(c)
        n_ads_bandit_results[label] = res
    n_ads_bandit_results = pd.DataFrame(n_ads_bandit_results)
In [32]:
x = pd.DataFrame(pd.DataFrame.mean(n_ads_bandit_results))
x.columns = ['Clicks']
x.index.name = "Strategy/No. of ads"
x.reset_index(inplace=True)

x
Out[32]:
Strategy/No. of ads Clicks
0 02 11329.121
1 03 12573.173
2 04 13058.768
3 05 13423.703
4 06 13702.728
5 07 13895.402
6 08 14163.068
7 09 14142.196
8 10 14374.204
9 cheat 11720.622

With a bandit, adding more ads is very helpful. Throw shit against the wall and let the algorithm sort it out

In [59]:
n_ads_bandit_df = pd.melt(pd.DataFrame(n_ads_bandit_results))
if not os.path.isfile("nads_bandit1.csv"):
    n_ads_bandit_df.to_csv("nads_bandit1.csv")
plt.figure(figsize=(12,8))
p = sns.boxplot(x="Strategy/No. of ads", y="value", data=n_ads_bandit_df)
p.set(xlabel="Number of Adverts", ylabel="Clicks")
sns.plt.show()
In [34]:
def play100ImpressionsGame(strategy,turns):
    s = ClicksGame()
    s.weeklyimpressions = 100
    while s.state['turns'] < turns:
        strategy(s)
    return(s.total_clicks())

results_100Impressions = {}

for (label,game) in tqdm_notebook(games,desc="Strategy"):
    res = []
    for i in tqdm_notebook(range(1000), desc=label, leave=False):
        c = play100ImpressionsGame(game,52)
        res.append(c)
    results_100Impressions[label] = res

results_100Impressions = pd.DataFrame(results_100Impressions)

In [60]:
df_100impressions = pd.melt(results_100Impressions)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=df_100impressions)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()
In [61]:
def play50ImpressionsGame(strategy,turns):
    s = ClicksGame()
    s.weeklyimpressions = 50
    while s.state['turns'] < turns:
        strategy(s)
    return(s.total_clicks())

results_50Impressions = {}

for (label,game) in tqdm_notebook(games,desc="Strategy"):
    res = []
    for i in tqdm_notebook(range(1000), desc=label, leave=False):
        c = play50ImpressionsGame(game,52)
        res.append(c)
    results_50Impressions[label] = res

results_50Impressions = pd.DataFrame(results_50Impressions)

df_50impressions = pd.melt(results_50Impressions)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=df_50impressions)
p.set(xlabel="Method", ylabel="Clicks")
sns.plt.show()

Maximise End Result

In [64]:
def playEndResultGame(strategy,turns):
    s = ClicksGame()
    while s.state['turns'] < turns:
        strategy(s)
    ads = s.state['active_ads']
    best = max([ads[i]['trueCtr'] for i in ads])
    return(best,s.state["n_ads"])
In [66]:
end_results = {}
end_results_n_ads = {}

for (label,game) in tqdm_notebook(games,desc="Strategy"):
    res = []
    n_ads = []
    for i in tqdm_notebook(range(1000), desc=label, leave=False):
        c,n = playEndResultGame(game,10)
        res.append(c)
        n_ads.append(n)
    end_results[label] = res
    end_results_n_ads[label] = n_ads

In [62]:
end_results = pd.DataFrame(end_results)

df_end_results = pd.melt(end_results)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=df_end_results)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()
In [71]:
pd.DataFrame.mean(pd.DataFrame(end_results_n_ads))
Out[71]:
cheat         11.000
chi2_0.01      6.221
chi2_0.05      7.041
chi2_0.10      7.604
chi2_0.20      8.212
gtest_0.01     6.188
gtest_0.05     6.901
gtest_0.10     7.514
gtest_0.20     8.219
nothing        2.000
pick_best     11.000
random        11.000
dtype: float64
In [46]:
def playEndResult100ImpressionsGame(strategy,turns):
    s = ClicksGame()
    s.weeklyimpressions = 100
    while s.state['turns'] < turns:
        strategy(s)
    ads = s.state['active_ads']
    best = max([ads[i]['trueCtr'] for i in ads])
    return(best)

end_results_100 = {}

for (label,game) in tqdm_notebook(games,desc="Strategy"):
    res = []
    for i in tqdm_notebook(range(1000), desc=label, leave=False):
        c = playEndResult100ImpressionsGame(game,10)
        res.append(c)
    end_results_100[label] = res
    
end_results_100 = pd.DataFrame(end_results_100)

df_end_results_100 = pd.melt(end_results_100)
plt.figure(figsize=(15,15))
p = sns.boxplot(x="variable", y="value", data=df_end_results_100)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()

In [76]:
def playRestrictedVariationsGame(strategy,ads):
    s = ClicksGame()
    while s.state['n_ads'] < ads:
        strategy(s)
    ads = s.state['active_ads']
    best = max([ads[i]['trueCtr'] for i in ads])
    return(best)

restricted_variations = {}

for (label,game) in tqdm_notebook(games[1:],desc="Strategy"):
    res = []
    for i in tqdm_notebook(range(1000), desc=label, leave=False):
        c = playRestrictedVariationsGame(game,5)
        res.append(c)
    restricted_variations[label] = res
    
restricted_variations = pd.DataFrame(restricted_variations)

restricted_variations_df = pd.melt(restricted_variations)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=restricted_variations_df)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()
744/|/ 74%|| 744/1000 [01:39<00:34,  7.47it/s]
456/|/ 46%|| 456/1000 [00:29<00:35, 15.31it/s]
422/|/ 42%|| 422/1000 [00:17<00:23, 24.33it/s]
581/|/ 58%|| 581/1000 [01:58<01:25,  4.89it/s]

In [87]:
def playRestrictedVariationsGame50(strategy,ads):
    s = ClicksGame()
    s.weeklyimpressions = 100
    while s.state['n_ads'] < ads and s.state['turns'] < 50:
        strategy(s)
    ads = s.state['active_ads']
    best = max([ads[i]['trueCtr'] for i in ads])
    return(best)

restricted_variations_50 = {}

for (label,game) in tqdm_notebook(games[1:],desc="Strategy"):
    res = []
    for i in tqdm_notebook(range(1000), desc=label, leave=False):
        c = playRestrictedVariationsGame(game,5)
        res.append(c)
    restricted_variations_50[label] = res
    
restricted_variations_50 = pd.DataFrame(restricted_variations_50)

restricted_variations_50_df = pd.melt(restricted_variations)
plt.figure(figsize=(12,8))
p = sns.boxplot(x="variable", y="value", data=restricted_variations_50_df)
p.set(xlabel="Method", ylabel="Final CTR")
sns.plt.show()
805/|/ 80%|| 805/1000 [7:03:20<1:42:33, 31.55s/it]
931/|/ 93%|| 931/1000 [00:56<00:04, 16.35it/s]
669/|/ 67%|| 669/1000 [00:17<00:08, 37.30it/s]
836/|/ 84%|| 836/1000 [01:14<00:14, 11.24it/s]

In [ ]: