From ad70f4da736bcfcabb51859fc165f60067173db6 Mon Sep 17 00:00:00 2001 From: Will McGinnis Date: Sun, 29 Mar 2020 17:23:58 -0400 Subject: [PATCH] tests for blended competitor and API consistency --- docs/source/arenas.rst | 17 +++++++ docs/source/competitors.rst | 25 ++++++++--- elote/arenas/base.py | 78 +++++++++++++++++++++++++++++++-- elote/competitors/dwz.py | 29 +++++++----- elote/competitors/elo.py | 18 +++++--- elote/competitors/ensemble.py | 23 ++++++---- elote/competitors/glicko.py | 22 +++++++--- tests/test_BlendedCompetitor.py | 72 ++++++++++++++++++++++++++++++ 8 files changed, 245 insertions(+), 39 deletions(-) create mode 100644 tests/test_BlendedCompetitor.py diff --git a/docs/source/arenas.rst b/docs/source/arenas.rst index eb6bbf7..4a93cf8 100644 --- a/docs/source/arenas.rst +++ b/docs/source/arenas.rst @@ -1,6 +1,23 @@ Arenas ====== +Arenas are objects that manage populations of competitors and their matchups. Currently there is only one +type of arena implemented, LambdaArenas + +Lambda Arena +------------ .. autoclass:: elote.arenas.lambda_arena.LambdaArena :members: + + +Helpers +------- + +.. autoclass:: elote.arenas.base.History + :members: + +.. autoclass:: elote.arenas.base.Bout + :members: + + diff --git a/docs/source/competitors.rst b/docs/source/competitors.rst index 56807e2..6c5c427 100644 --- a/docs/source/competitors.rst +++ b/docs/source/competitors.rst @@ -1,17 +1,32 @@ Competitors =========== +Elo Competitor +-------------- + .. autoclass:: elote.competitors.elo.EloCompetitor - :members: export_state,expected_score,beat,tied + :members: export_state,expected_score,beat,tied,rating + +Glicko Competitor +----------------- .. autoclass:: elote.competitors.glicko.GlickoCompetitor - :members: export_state,expected_score,beat,tied + :members: export_state,expected_score,beat,tied,rating + +DWZ Competitor +-------------- .. autoclass:: elote.competitors.dwz.DWZCompetitor - :members: export_state,expected_score,beat,tied + :members: export_state,expected_score,beat,tied,rating + +ECF Competitor +-------------- .. autoclass:: elote.competitors.ecf.ECFCompetitor - :members: export_state,expected_score,beat,tied + :members: export_state,expected_score,beat,tied,rating + +BlendedCompetitor +----------------- .. autoclass:: elote.competitors.ensemble.BlendedCompetitor - :members: export_state,expected_score,beat,tied + :members: export_state,expected_score,beat,tied,rating diff --git a/elote/arenas/base.py b/elote/arenas/base.py index d96b1b6..bac0f33 100644 --- a/elote/arenas/base.py +++ b/elote/arenas/base.py @@ -26,12 +26,26 @@ def export_state(self): class History: def __init__(self): + """ + + """ self.bouts = [] def add_bout(self, bout): + """ + + :param bout: + :return: + """ self.bouts.append(bout) def report_results(self, lower_threshold=0.5, upper_threshold=0.5): + """ + + :param lower_threshold: + :param upper_threshold: + :return: + """ report = list() for bout in self.bouts: report.append({ @@ -44,6 +58,13 @@ def report_results(self, lower_threshold=0.5, upper_threshold=0.5): return report def confusion_matrix(self, lower_threshold=0.5, upper_threshold=0.5, attribute_filter=None): + """ + + :param lower_threshold: + :param upper_threshold: + :param attribute_filter: + :return: + """ tp, fp, tn, fn, do_nothing = 0, 0, 0, 0, 0 for bout in self.bouts: match = True @@ -67,6 +88,11 @@ def confusion_matrix(self, lower_threshold=0.5, upper_threshold=0.5, attribute_f return tp, fp, tn, fn, do_nothing def random_search(self, trials=1000): + """ + + :param trials: + :return: + """ best_net, best_thresholds = 0, list() for _ in range(trials): thresholds = sorted([random.random(), random.random()]) @@ -80,6 +106,14 @@ def random_search(self, trials=1000): class Bout: def __init__(self, a, b, predicted_outcome, outcome, attributes=None): + """ + + :param a: + :param b: + :param predicted_outcome: + :param outcome: + :param attributes: + """ self.a = a self.b = b self.predicted_outcome = predicted_outcome @@ -87,46 +121,82 @@ def __init__(self, a, b, predicted_outcome, outcome, attributes=None): self.attributes = attributes or dict() def true_positive(self, threshold=0.5): + """ + + :param threshold: + :return: + """ if self.predicted_outcome > threshold and self.outcome == 'win': return True else: return False def false_positive(self, threshold=0.5): + """ + + :param threshold: + :return: + """ if self.predicted_outcome > threshold and self.outcome != 'win': return True else: return False def true_negative(self, threshold=0.5): + """ + + :param threshold: + :return: + """ if self.predicted_outcome <= threshold and self.outcome == 'loss': return True else: return False def false_negative(self, threshold=0.5): + """ + + :param threshold: + :return: + """ if self.predicted_outcome <= threshold and self.outcome != 'loss': return True else: return False - def predicted_winner(self, lower_treshold=0.5, upper_threshold=0.5): + def predicted_winner(self, lower_threshold=0.5, upper_threshold=0.5): + """ + + :param lower_threshold: + :param upper_threshold: + :return: + """ if self.predicted_outcome > upper_threshold: return self.a - elif self.predicted_outcome < lower_treshold: + elif self.predicted_outcome < lower_threshold: return self.b else: return None - def predicted_loser(self, lower_treshold=0.5, upper_threshold=0.5): + def predicted_loser(self, lower_threshold=0.5, upper_threshold=0.5): + """ + + :param lower_threshold: + :param upper_threshold: + :return: + """ if self.predicted_outcome > upper_threshold: return self.b - elif self.predicted_outcome < lower_treshold: + elif self.predicted_outcome < lower_threshold: return self.a else: return None def actual_winner(self): + """ + + :return: + """ if self.outcome == 'win': return self.a elif self.outcome == 'loss': diff --git a/elote/competitors/dwz.py b/elote/competitors/dwz.py index 270667d..b4a05e5 100644 --- a/elote/competitors/dwz.py +++ b/elote/competitors/dwz.py @@ -15,7 +15,7 @@ class vars: :param initial_rating: the initial rating to use for a new competitor who has no history. Default 400 """ self._count = 0 - self.rating = initial_rating + self._rating = initial_rating def __repr__(self): return '' % (self.__hash__()) @@ -23,6 +23,14 @@ def __repr__(self): def __str__(self): return '' + @property + def rating(self): + return self._rating + + @rating.setter + def rating(self, value): + self._rating = value + def export_state(self): """ Exports all information needed to re-create this competitor from scratch later on. @@ -30,12 +38,13 @@ def export_state(self): :return: dictionary of kwargs and class-args to re-instantiate this object """ return { - "initial_rating": self.rating, + "initial_rating": self._rating, "class_vars": { "_J": self._J } } + def expected_score(self, competitor: BaseCompetitor): """ The expected outcome of a match between this competitor and one passed in. Scaled between 0-1, where 1 is a strong @@ -46,15 +55,15 @@ def expected_score(self, competitor: BaseCompetitor): """ self.verify_competitor_types(competitor) - return 1 / (1 + 10 ** ((competitor.rating - self.rating) / 400 )) + return 1 / (1 + 10 ** ((competitor.rating - self._rating) / 400)) @property def _E(self): - E0 = (self.rating / 1000) ** 4 + self._J - a = max([0.5, min([self.rating / 2000, 1])]) + E0 = (self._rating / 1000) ** 4 + self._J + a = max([0.5, min([self._rating / 2000, 1])]) - if self.rating < 1300: - B = math.exp((1300 - self.rating) / 150) - 1 + if self._rating < 1300: + B = math.exp((1300 - self._rating) / 150) - 1 else: B = 0 @@ -65,7 +74,7 @@ def _E(self): return max([5, min([E, 150])]) def _new_rating(self, competitor, W_a): - return self.rating + (800 / (self._E + self._count)) * (W_a - self.expected_score(competitor)) + return self._rating + (800 / (self._E + self._count)) * (W_a - self.expected_score(competitor)) def beat(self, competitor: BaseCompetitor): """ @@ -80,7 +89,7 @@ def beat(self, competitor: BaseCompetitor): self_rating = self._new_rating(competitor, 1) competitor_rating = competitor._new_rating(self, 0) - self.rating = self_rating + self._rating = self_rating self._count += 1 competitor.rating = competitor_rating @@ -98,7 +107,7 @@ def tied(self, competitor: BaseCompetitor): self_rating = self._new_rating(competitor, 0.5) competitor_rating = competitor._new_rating(self, 0.5) - self.rating = self_rating + self._rating = self_rating self._count += 1 competitor.rating = competitor_rating diff --git a/elote/competitors/elo.py b/elote/competitors/elo.py index 41d6d46..110de42 100644 --- a/elote/competitors/elo.py +++ b/elote/competitors/elo.py @@ -43,7 +43,7 @@ def __init__(self, initial_rating: float = 400): :param initial_rating: the initial rating to use for a new competitor who has no history. Default 400 :type initial_rating: int """ - self.rating = initial_rating + self._rating = initial_rating def __repr__(self): return '' % (self.__hash__()) @@ -58,7 +58,7 @@ def export_state(self): :return: dictionary of kwargs and class-args to re-instantiate this object """ return { - "initial_rating": self.rating, + "initial_rating": self._rating, "class_vars": { "_k_factor": self._k_factor, "_base_rating": self._base_rating @@ -67,7 +67,15 @@ def export_state(self): @property def transformed_rating(self): - return 10 ** (self.rating / self._base_rating) + return 10 ** (self._rating / self._base_rating) + + @property + def rating(self): + return self._rating + + @rating.setter + def rating(self, value): + self._rating = value def expected_score(self, competitor: BaseCompetitor): """ @@ -97,7 +105,7 @@ def beat(self, competitor: BaseCompetitor): lose_es = competitor.expected_score(self) # update the winner's rating - self.rating = self.rating + self._k_factor * (1 - win_es) + self._rating = self._rating + self._k_factor * (1 - win_es) # update the loser's rating competitor.rating = competitor.rating + self._k_factor * (0 - lose_es) @@ -116,7 +124,7 @@ def tied(self, competitor: BaseCompetitor): lose_es = competitor.expected_score(self) # update the winner's rating - self.rating = self.rating + self._k_factor * (0.5 - win_es) + self._rating = self._rating + self._k_factor * (0.5 - win_es) # update the loser's rating competitor.rating = competitor.rating + self._k_factor * (0.5 - lose_es) diff --git a/elote/competitors/ensemble.py b/elote/competitors/ensemble.py index 4ffe192..43085c6 100644 --- a/elote/competitors/ensemble.py +++ b/elote/competitors/ensemble.py @@ -16,11 +16,11 @@ def __init__(self, competitors: list, blend_mode: str = "mean"): :param competitors: :param blend_mode: """ - self._sub_competitors = [] + self.sub_competitors = [] for competitor in competitors: comp_type = competitor_types.get(competitor.get('type', 'EloCompetitor')) comp_kwargs = competitor.get('competitor_kwargs', {}) - self._sub_competitors.append(comp_type(**comp_kwargs)) + self.sub_competitors.append(comp_type(**comp_kwargs)) self.blend_mode = blend_mode @@ -30,6 +30,10 @@ def __repr__(self): def __str__(self): return '' + @property + def rating(self): + return sum([x.rating for x in self.sub_competitors]) + def export_state(self): """ Exports all information needed to re-create this competitor from scratch later on. @@ -43,7 +47,7 @@ def export_state(self): "type": x.__name__, "competitor_kwargs": x.export_state() } - for x in self._sub_competitors + for x in self.sub_competitors ] } @@ -59,7 +63,10 @@ def expected_score(self, competitor: BaseCompetitor): self.verify_competitor_types(competitor) if self.blend_mode == 'mean': - return sum([x.expected_score(competitor) for x in self._sub_competitors]) / len(self._sub_competitors) + es = list() + for c, other_c in zip(self.sub_competitors, competitor.sub_competitors): + es.append(c.expected_score(other_c)) + return sum(es) / len(es) else: raise NotImplementedError('Blend mode %s not supported' % (self.blend_mode, )) @@ -73,8 +80,8 @@ def beat(self, competitor: BaseCompetitor): self.verify_competitor_types(competitor) - for c in self._sub_competitors: - c.beat(competitor) + for c, other_c in zip(self.sub_competitors, competitor.sub_competitors): + c.beat(other_c) def tied(self, competitor: BaseCompetitor): """ @@ -86,5 +93,5 @@ def tied(self, competitor: BaseCompetitor): self.verify_competitor_types(competitor) - for c in self._sub_competitors: - c.tied(competitor) + for c, other_c in zip(self.sub_competitors, competitor.sub_competitors): + c.beat(other_c) diff --git a/elote/competitors/glicko.py b/elote/competitors/glicko.py index 3100bd2..8a147e9 100644 --- a/elote/competitors/glicko.py +++ b/elote/competitors/glicko.py @@ -17,7 +17,7 @@ class vars: :param initial_rating: the initial rating to use for a new competitor who has no history. Default 1500 :param initial_rd: initial value of rd to use for new competitors with no history. Default 350 """ - self.rating = initial_rating + self._rating = initial_rating self.rd = initial_rd def __repr__(self): @@ -33,7 +33,7 @@ def export_state(self): :return: dictionary of kwargs and class-args to re-instantiate this object """ return { - "initial_rating": self.rating, + "initial_rating": self._rating, "initial_rd": self.rd, "class_vars": { "_c": self._c, @@ -41,6 +41,14 @@ def export_state(self): } } + @property + def rating(self): + return self._rating + + @rating.setter + def rating(self, value): + self._rating = value + @property def tranformed_rd(self): return min([350, math.sqrt(self.rd ** 2 + self._c ** 2)]) @@ -61,7 +69,7 @@ def expected_score(self, competitor: BaseCompetitor): self.verify_competitor_types(competitor) g_term = self._g(self.rd ** 2) - E = 1 / (1 + 10 ** ((-1 * g_term * (self.rating - competitor.rating))/400)) + E = 1 / (1 + 10 ** ((-1 * g_term * (self._rating - competitor.rating)) / 400)) return E def beat(self, competitor: BaseCompetitor): @@ -78,7 +86,7 @@ def beat(self, competitor: BaseCompetitor): s = 1 E_term = self.expected_score(competitor) d_squared = (self._q ** 2 * (self._g(competitor.rd) ** 2 * E_term * (1 - E_term))) ** -1 - s_new_r = self.rating + (self._q / (1 / self.rd ** 2 + 1 / d_squared)) * self._g(competitor.rd) * (s - E_term) + s_new_r = self._rating + (self._q / (1 / self.rd ** 2 + 1 / d_squared)) * self._g(competitor.rd) * (s - E_term) s_new_rd = math.sqrt((1 / self.rd ** 2 + 1 / d_squared) ** -1) # then the competitor @@ -90,7 +98,7 @@ def beat(self, competitor: BaseCompetitor): c_new_rd = math.sqrt((1 / competitor.rd ** 2 + 1 / d_squared) ** -1) # assign everything - self.rating = s_new_r + self._rating = s_new_r self.rd = s_new_rd competitor.rating = c_new_r competitor.rd = c_new_rd @@ -109,7 +117,7 @@ def tied(self, competitor: BaseCompetitor): s = 0.5 E_term = self.expected_score(competitor) d_squared = (self._q ** 2 * (self._g(competitor.rd) ** 2 * E_term * (1 - E_term))) ** -1 - s_new_r = self.rating + (self._q / (1 / self.rd ** 2 + 1 / d_squared)) * self._g(competitor.rd) * (s - E_term) + s_new_r = self._rating + (self._q / (1 / self.rd ** 2 + 1 / d_squared)) * self._g(competitor.rd) * (s - E_term) s_new_rd = math.sqrt((1 / self.rd ** 2 + 1 / d_squared) ** -1) # then the competitor @@ -121,7 +129,7 @@ def tied(self, competitor: BaseCompetitor): c_new_rd = math.sqrt((1 / competitor.rd ** 2 + 1 / d_squared) ** -1) # assign everything - self.rating = s_new_r + self._rating = s_new_r self.rd = s_new_rd competitor.rating = c_new_r competitor.rd = c_new_rd diff --git a/tests/test_BlendedCompetitor.py b/tests/test_BlendedCompetitor.py new file mode 100644 index 0000000..cb48027 --- /dev/null +++ b/tests/test_BlendedCompetitor.py @@ -0,0 +1,72 @@ +import unittest +from elote import BlendedCompetitor, GlickoCompetitor +from elote.competitors.base import MissMatchedCompetitorTypesException + + +class TestBlendedCompetitor(unittest.TestCase): + def test_Improvement(self): + player1 = BlendedCompetitor(competitors=[ + {"type": "EloCompetitor", "competitor_kwargs": {}}, + {"type": "GlickoCompetitor", "competitor_kwargs": {}}, + {"type": "DWZCompetitor", "competitor_kwargs": {}}, + {"type": "ECFCompetitor", "competitor_kwargs": {}} + ]) + initial_rating = player1.rating + # if player1 beats someone with a high rating, their rating should go up. + for _ in range(10): + player2 = BlendedCompetitor(competitors=[ + {"type": "EloCompetitor", "competitor_kwargs": {"initial_rating": 1000}}, + {"type": "GlickoCompetitor", "competitor_kwargs": {}}, + {"type": "DWZCompetitor", "competitor_kwargs": {}}, + {"type": "ECFCompetitor", "competitor_kwargs": {}} + ]) + player1.beat(player2) + self.assertGreater(player1.rating, initial_rating) + initial_rating = player1.rating + + def test_Decay(self): + player1 = BlendedCompetitor(competitors=[ + {"type": "EloCompetitor", "competitor_kwargs": {}}, + {"type": "GlickoCompetitor", "competitor_kwargs": {}}, + {"type": "DWZCompetitor", "competitor_kwargs": {}}, + {"type": "ECFCompetitor", "competitor_kwargs": {}} + ]) + initial_rating = player1.rating + # if player1 beats someone with a high rating, their rating should go up. + for _ in range(10): + player2 = BlendedCompetitor(competitors=[ + {"type": "EloCompetitor", "competitor_kwargs": {"initial_rating": 1000}}, + {"type": "GlickoCompetitor", "competitor_kwargs": {}}, + {"type": "DWZCompetitor", "competitor_kwargs": {}}, + {"type": "ECFCompetitor", "competitor_kwargs": {}} + ]) + player2.beat(player1) + self.assertLess(player1.rating, initial_rating) + initial_rating = player1.rating + + def test_Expectation(self): + player1 = BlendedCompetitor(competitors=[ + {"type": "EloCompetitor", "competitor_kwargs": {"initial_rating": 1000}}, + {"type": "GlickoCompetitor", "competitor_kwargs": {}}, + {"type": "DWZCompetitor", "competitor_kwargs": {}}, + {"type": "ECFCompetitor", "competitor_kwargs": {}} + ]) + player2 = BlendedCompetitor(competitors=[ + {"type": "EloCompetitor", "competitor_kwargs": {"initial_rating": 100}}, + {"type": "GlickoCompetitor", "competitor_kwargs": {}}, + {"type": "DWZCompetitor", "competitor_kwargs": {}}, + {"type": "ECFCompetitor", "competitor_kwargs": {}} + ]) + self.assertGreater(player1.expected_score(player2), player2.expected_score(player1)) + + def test_Exceptions(self): + player1 = BlendedCompetitor(competitors=[ + {"type": "EloCompetitor", "competitor_kwargs": {"initial_rating": 1000}}, + {"type": "GlickoCompetitor", "competitor_kwargs": {}}, + {"type": "DWZCompetitor", "competitor_kwargs": {}}, + {"type": "ECFCompetitor", "competitor_kwargs": {}} + ]) + player2 = GlickoCompetitor(initial_rating=100) + + with self.assertRaises(MissMatchedCompetitorTypesException): + player1.verify_competitor_types(player2) \ No newline at end of file