diff --git a/adapt/intent.py b/adapt/intent.py index fae865b..8abb662 100644 --- a/adapt/intent.py +++ b/adapt/intent.py @@ -15,6 +15,8 @@ __author__ = 'seanfitz' +import itertools + CLIENT_ENTITY_NAME = 'Client' @@ -30,21 +32,24 @@ def find_first_tag(tags, entity_type, after_index=-1): """Searches tags for entity type after given index Args: - tags(list): a list of tags with entity types to be compaired too entity_type + tags(list): a list of tags with entity types to be compared to + entity_type entity_type(str): This is he entity type to be looking for in tags - after_index(int): the start token must be greaterthan this. + after_index(int): the start token must be greater than this. Returns: ( tag, v, confidence ): tag(str): is the tag that matched v(str): ? the word that matched? - confidence(float): is a mesure of accuacy. 1 is full confidence and 0 is none. + confidence(float): is a measure of accuracy. 1 is full confidence + and 0 is none. """ for tag in tags: for entity in tag.get('entities'): for v, t in entity.get('data'): if t.lower() == entity_type.lower() and \ - (tag.get('start_token', 0) > after_index or tag.get('from_context', False)): + (tag.get('start_token', 0) > after_index or \ + tag.get('from_context', False)): return tag, v, entity.get('confidence') return None, None, None @@ -58,38 +63,37 @@ def find_next_tag(tags, end_index=0): def choose_1_from_each(lists): - """Takes a list of lists and returns a list of lists with one item - from each list. This new list should be the length of each list multiplied - by the others. 18 for an list with lists of 3, 2 and 3. Also the lenght - of each sub list should be same as the length of lists passed in. + """ + The original implementation here was functionally equivalent to + :func:`~itertools.product`, except that the former returns a generator + of lists, and itertools returns a generator of tuples. This is going to do + a light transform for now, until callers can be verified to work with + tuples. Args: - lists(list of Lists): A list of lists + A list of lists or tuples, expected as input to + :func:`~itertools.product` Returns: - list of lists: returns a list of lists constructions of one item from each - list in lists. + a generator of lists, see docs on :func:`~itertools.product` """ - if len(lists) == 0: - yield [] - else: - for el in lists[0]: - for next_list in choose_1_from_each(lists[1:]): - yield [el] + next_list + for result in itertools.product(*lists): + yield list(result) def resolve_one_of(tags, at_least_one): - """This searches tags for Entities in at_least_one and returns any match + """Search through all combinations of at_least_one rules to find a + combination that is covered by tags Args: tags(list): List of tags with Entities to search for Entities at_least_one(list): List of Entities to find in tags Returns: - object: returns None if no match is found but returns any match as an object + object: + returns None if no match is found but returns any match as an object """ - if len(tags) < len(at_least_one): - return None + for possible_resolution in choose_1_from_each(at_least_one): resolution = {} pr = possible_resolution[:] @@ -97,13 +101,15 @@ def resolve_one_of(tags, at_least_one): last_end_index = -1 if entity_type in resolution: last_end_index = resolution[entity_type][-1].get('end_token') - tag, value, c = find_first_tag(tags, entity_type, after_index=last_end_index) + tag, value, c = find_first_tag(tags, entity_type, + after_index=last_end_index) if not tag: break else: if entity_type not in resolution: resolution[entity_type] = [] resolution[entity_type].append(tag) + # Check if this is a valid resolution (all one_of rules matched) if len(resolution) == len(possible_resolution): return resolution @@ -129,23 +135,24 @@ def validate(self, tags, confidence): """Using this method removes tags from the result of validate_with_tags Returns: - intent(intent): Resuts from validate_with_tags + intent(intent): Results from validate_with_tags """ intent, tags = self.validate_with_tags(tags, confidence) return intent - def validate_with_tags(self, tags, parse_weight): + def validate_with_tags(self, tags, confidence): """Validate whether tags has required entites for this intent to fire Args: tags(list): Tags and Entities used for validation - parse_weight(float): The weight associate to the parse result, + confidence(float): The weight associate to the parse result, as indicated by the parser. This is influenced by a parser that uses edit distance or context. Returns: intent, tags: Returns intent and tags used by the intent on - falure to meat required entities then returns intent with confidence + failure to meat required entities then returns intent with + confidence of 0.0 and an empty list for tags. """ result = {'intent_type': self.name} @@ -154,7 +161,8 @@ def validate_with_tags(self, tags, parse_weight): used_tags = [] for require_type, attribute_name in self.requires: - required_tag, canonical_form, tag_confidence = find_first_tag(local_tags, require_type) + required_tag, canonical_form, tag_confidence = \ + find_first_tag(local_tags, require_type) if not required_tag: result['confidence'] = 0.0 return result, [] @@ -166,20 +174,24 @@ def validate_with_tags(self, tags, parse_weight): intent_confidence += tag_confidence if len(self.at_least_one) > 0: - best_resolution = resolve_one_of(tags, self.at_least_one) + best_resolution = resolve_one_of(local_tags, self.at_least_one) if not best_resolution: result['confidence'] = 0.0 return result, [] else: for key in best_resolution: - result[key] = best_resolution[key][0].get('key') # TODO: at least one must support aliases - intent_confidence += 1.0 * best_resolution[key][0]['entities'][0].get('confidence', 1.0) - used_tags.append(best_resolution) + # TODO: at least one should support aliases + result[key] = best_resolution[key][0].get('key') + intent_confidence += \ + 1.0 * best_resolution[key][0]['entities'][0]\ + .get('confidence', 1.0) + used_tags.append(best_resolution[key][0]) if best_resolution in local_tags: - local_tags.remove(best_resolution) + local_tags.remove(best_resolution[key][0]) for optional_type, attribute_name in self.optional: - optional_tag, canonical_form, tag_confidence = find_first_tag(local_tags, optional_type) + optional_tag, canonical_form, tag_confidence = \ + find_first_tag(local_tags, optional_type) if not optional_tag or attribute_name in result: continue result[attribute_name] = canonical_form @@ -188,9 +200,11 @@ def validate_with_tags(self, tags, parse_weight): used_tags.append(optional_tag) intent_confidence += tag_confidence - total_confidence = (intent_confidence / len(tags) * parse_weight) if tags else 0.0 + total_confidence = (intent_confidence / len(tags) * confidence) \ + if tags else 0.0 - target_client, canonical_form, parse_weight = find_first_tag(local_tags, CLIENT_ENTITY_NAME) + target_client, canonical_form, confidence = \ + find_first_tag(local_tags, CLIENT_ENTITY_NAME) result['target'] = target_client.get('key') if target_client else None result['confidence'] = total_confidence @@ -204,7 +218,7 @@ class IntentBuilder(object): Attributes: at_least_one(list): A list of Entities where one is required. - These are seperated into lists so you can have one of (A or B) and + These are separated into lists so you can have one of (A or B) and then require one of (D or F). requires(list): A list of Required Entities optional(list): A list of optional Entities @@ -214,14 +228,18 @@ class IntentBuilder(object): This is designed to allow construction of intents in one line. Example: - IntentBuilder("Intent").requires("A").one_of("C","D").optional("G").build() + IntentBuilder("Intent")\ + .requires("A")\ + .one_of("C","D")\ + .optional("G").build() """ def __init__(self, intent_name): """ Constructor Args: - intent_name(str): the name of the intents that this parser parses/validates + intent_name(str): the name of the intents that this parser + parses/validates """ self.at_least_one = [] self.requires = [] @@ -230,7 +248,8 @@ def __init__(self, intent_name): def one_of(self, *args): """ - The intent parser should require one of the provided entity types to validate this clause. + The intent parser should require one of the provided entity types to + validate this clause. Args: args(args): *args notation list of entity names @@ -247,7 +266,8 @@ def require(self, entity_type, attribute_name=None): Args: entity_type(str): an entity type - attribute_name(str): the name of the attribute on the parsed intent. Defaults to match entity_type. + attribute_name(str): the name of the attribute on the parsed intent. + Defaults to match entity_type. Returns: self: to continue modifications. @@ -259,11 +279,13 @@ def require(self, entity_type, attribute_name=None): def optionally(self, entity_type, attribute_name=None): """ - Parsed intents from this parser can optionally include an entity of the provided type. + Parsed intents from this parser can optionally include an entity of the + provided type. Args: entity_type(str): an entity type - attribute_name(str): the name of the attribute on the parsed intent. Defaults to match entity_type. + attribute_name(str): the name of the attribute on the parsed intent. + Defaults to match entity_type. Returns: self: to continue modifications. @@ -279,4 +301,5 @@ def build(self): :return: an Intent instance. """ - return Intent(self.name, self.requires, self.at_least_one, self.optional) + return Intent(self.name, self.requires, + self.at_least_one, self.optional) diff --git a/setup.py b/setup.py index 0780745..36853d0 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def required(requirements_file): setup( name="adapt-parser", - version="0.4.0", + version="0.4.1", author="Sean Fitzgerald", author_email="sean@fitzgeralds.me", description=("A text-to-intent parsing framework."), diff --git a/test/IntentTest.py b/test/IntentTest.py index 37431ea..0476b02 100644 --- a/test/IntentTest.py +++ b/test/IntentTest.py @@ -17,7 +17,7 @@ import unittest from adapt.parser import Parser from adapt.entity_tagger import EntityTagger -from adapt.intent import IntentBuilder, resolve_one_of +from adapt.intent import IntentBuilder, resolve_one_of, choose_1_from_each from adapt.tools.text.tokenizer import EnglishTokenizer from adapt.tools.text.trie import Trie @@ -30,12 +30,15 @@ def setUp(self): self.trie = Trie() self.tokenizer = EnglishTokenizer() self.regex_entities = [] - self.tagger = EntityTagger(self.trie, self.tokenizer, regex_entities=self.regex_entities) + self.tagger = EntityTagger(self.trie, self.tokenizer, + regex_entities=self.regex_entities) self.trie.insert("play", ("play", "PlayVerb")) self.trie.insert("stop", ("stop", "StopVerb")) - self.trie.insert("the big bang theory", ("the big bang theory", "Television Show")) + self.trie.insert("the big bang theory", + ("the big bang theory", "Television Show")) self.trie.insert("the big", ("the big", "Not a Thing")) - self.trie.insert("barenaked ladies", ("barenaked ladies", "Radio Station")) + self.trie.insert("barenaked ladies", + ("barenaked ladies", "Radio Station")) self.trie.insert("show", ("show", "Command")) self.trie.insert("what", ("what", "Question")) self.parser = Parser(self.tokenizer, self.tagger) @@ -44,29 +47,32 @@ def tearDown(self): pass def test_basic_intent(self): - intent = IntentBuilder("play television intent")\ - .require("PlayVerb")\ - .require("Television Show")\ + intent = IntentBuilder("play television intent") \ + .require("PlayVerb") \ + .require("Television Show") \ .build() for result in self.parser.parse("play the big bang theory"): - result_intent = intent.validate(result.get('tags'), result.get('confidence')) + result_intent = intent.validate(result.get('tags'), + result.get('confidence')) assert result_intent.get('confidence') > 0.0 assert result_intent.get('PlayVerb') == 'play' assert result_intent.get('Television Show') == "the big bang theory" def test_at_least_one(self): - intent = IntentBuilder("play intent")\ - .require("PlayVerb")\ - .one_of("Television Show", "Radio Station")\ + intent = IntentBuilder("play intent") \ + .require("PlayVerb") \ + .one_of("Television Show", "Radio Station") \ .build() for result in self.parser.parse("play the big bang theory"): - result_intent = intent.validate(result.get('tags'), result.get('confidence')) + result_intent = intent.validate(result.get('tags'), + result.get('confidence')) assert result_intent.get('confidence') > 0.0 assert result_intent.get('PlayVerb') == 'play' assert result_intent.get('Television Show') == "the big bang theory" for result in self.parser.parse("play the barenaked ladies"): - result_intent = intent.validate(result.get('tags'), result.get('confidence')) + result_intent = intent.validate(result.get('tags'), + result.get('confidence')) assert result_intent.get('confidence') > 0.0 assert result_intent.get('PlayVerb') == 'play' assert result_intent.get('Radio Station') == "barenaked ladies" @@ -76,14 +82,16 @@ def test_at_least_one_with_tag_in_multiple_slots(self): self.trie.insert("living room", ("living room", "living room")) self.trie.insert("what is", ("what is", "what is")) - intent = IntentBuilder("test intent")\ - .one_of("what is")\ - .one_of("temperature", "living room")\ - .one_of("temperature")\ + intent = IntentBuilder("test intent") \ + .one_of("what is") \ + .one_of("temperature", "living room") \ + .one_of("temperature") \ .build() - for result in self.parser.parse("what is the temperature in the living room"): - result_intent = intent.validate(result.get("tags"), result.get("confidence")) + for result in self.parser.parse( + "what is the temperature in the living room"): + result_intent = intent.validate(result.get("tags"), + result.get("confidence")) assert result_intent.get("confidence") > 0.0 assert result_intent.get("temperature") == "temperature" assert result_intent.get("living room") == "living room" @@ -94,12 +102,14 @@ def test_at_least_on_no_required(self): .one_of("Television Show", "Radio Station") \ .build() for result in self.parser.parse("play the big bang theory"): - result_intent = intent.validate(result.get('tags'), result.get('confidence')) + result_intent = intent.validate(result.get('tags'), + result.get('confidence')) assert result_intent.get('confidence') > 0.0 assert result_intent.get('Television Show') == "the big bang theory" for result in self.parser.parse("play the barenaked ladies"): - result_intent = intent.validate(result.get('tags'), result.get('confidence')) + result_intent = intent.validate(result.get('tags'), + result.get('confidence')) assert result_intent.get('confidence') > 0.0 assert result_intent.get('Radio Station') == "barenaked ladies" @@ -109,46 +119,51 @@ def test_at_least_one_alone(self): .build() for result in self.parser.parse("show"): - result_intent = intent.validate(result.get('tags'), result.get('confidence')) + result_intent = intent.validate(result.get('tags'), + result.get('confidence')) assert result_intent.get('confidence') > 0.0 assert result_intent.get('Command') == "show" def test_basic_intent_with_alternate_names(self): - intent = IntentBuilder("play television intent")\ - .require("PlayVerb", "Play Verb")\ - .require("Television Show", "series")\ + intent = IntentBuilder("play television intent") \ + .require("PlayVerb", "Play Verb") \ + .require("Television Show", "series") \ .build() for result in self.parser.parse("play the big bang theory"): - result_intent = intent.validate(result.get('tags'), result.get('confidence')) + result_intent = intent.validate(result.get('tags'), + result.get('confidence')) assert result_intent.get('confidence') > 0.0 assert result_intent.get('Play Verb') == 'play' assert result_intent.get('series') == "the big bang theory" def test_intent_with_regex_entity(self): self.trie = Trie() - self.tagger = EntityTagger(self.trie, self.tokenizer, self.regex_entities) + self.tagger = EntityTagger(self.trie, self.tokenizer, + self.regex_entities) self.parser = Parser(self.tokenizer, self.tagger) self.trie.insert("theory", ("theory", "Concept")) regex = re.compile(r"the (?P.*)") self.regex_entities.append(regex) - intent = IntentBuilder("mock intent")\ - .require("Event")\ + intent = IntentBuilder("mock intent") \ + .require("Event") \ .require("Concept").build() for result in self.parser.parse("the big bang theory"): - result_intent = intent.validate(result.get('tags'), result.get('confidence')) + result_intent = intent.validate(result.get('tags'), + result.get('confidence')) assert result_intent.get('confidence') > 0.0 assert result_intent.get('Event') == 'big bang' assert result_intent.get('Concept') == "theory" def test_intent_using_alias(self): self.trie.insert("big bang", ("the big bang theory", "Television Show")) - intent = IntentBuilder("play television intent")\ - .require("PlayVerb", "Play Verb")\ - .require("Television Show", "series")\ + intent = IntentBuilder("play television intent") \ + .require("PlayVerb", "Play Verb") \ + .require("Television Show", "series") \ .build() for result in self.parser.parse("play the big bang theory"): - result_intent = intent.validate(result.get('tags'), result.get('confidence')) + result_intent = intent.validate(result.get('tags'), + result.get('confidence')) assert result_intent.get('confidence') > 0.0 assert result_intent.get('Play Verb') == 'play' assert result_intent.get('series') == "the big bang theory" @@ -312,3 +327,175 @@ def test_resolve_one_of(self): } assert resolve_one_of(tags, at_least_one) == result + + +# noinspection PyPep8Naming +def TestTag(tag_name, + tag_value, + tag_confidence=1.0, + entity_confidence=1.0, + match=None): + """ + Create a dict in the shape of a tag as yielded from parser. + :param tag_name: tag name (equivalent to a label) + :param tag_value: tag value (value being labeled) + :param tag_confidence: confidence of parse of the tag, influenced by + fuzzy matching or context + :param entity_confidence: weight of the entity, influenced by + context + :param match: the text matched by the parser, which may not match tag_value + in the case of an alias or fuzzy matching. Defaults to tag_value. + + Uses "from_context" attribute to force token positioning to be ignored. + + :return: a dict that matches the shape of a parser tag + """ + return { + "confidence": tag_confidence, + "entities": [ + { + "confidence": entity_confidence, + "data": [ + [ + tag_value, + tag_name + ] + ], + "key": tag_value, + "match": match or tag_value + } + ], + "from_context": False, + "key": tag_value, + "match": match or tag_value, + "start_token": -1, + "end_token": -1, + "from_context": True + } + + +class IntentUtilityFunctionsTest(unittest.TestCase): + def test_choose_1_from_each_empty(self): + expected = [] + actual = list(choose_1_from_each([[]])) + self.assertListEqual(expected, actual) + + def test_choose_1_from_each_basic(self): + inputs = [ + ['A', 'B'], + ['C', 'D'] + ] + expected = [ + ['A', 'C'], + ['A', 'D'], + ['B', 'C'], + ['B', 'D'] + ] + actual = list(choose_1_from_each(inputs)) + self.assertListEqual(expected, actual) + + def test_choose_1_from_each_varying_sizes(self): + inputs = [ + ['A'], + ['B', 'C'], + ['D', 'E', 'F'] + ] + + expected = [ + ['A', 'B', 'D'], + ['A', 'B', 'E'], + ['A', 'B', 'F'], + ['A', 'C', 'D'], + ['A', 'C', 'E'], + ['A', 'C', 'F'], + ] + + actual = list(choose_1_from_each(inputs)) + self.assertListEqual(expected, actual) + + +class IntentScoringTest(unittest.TestCase): + def setUp(self): + self.require_intent = IntentBuilder('require_intent'). \ + require('required'). \ + build() + self.one_of_intent = IntentBuilder('one_of_intent'). \ + one_of('one_of_1', 'one_of_2'). \ + build() + self.optional_intent = IntentBuilder('optional_intent'). \ + optionally('optional'). \ + build() + self.all_features_intent = IntentBuilder('test_intent'). \ + require('required'). \ + one_of('one_of_1', 'one_of_2'). \ + optionally('optional'). \ + build() + + def test_basic_scoring_default_weights(self): + required = TestTag('required', 'foo') + one_of_1 = TestTag('one_of_1', 'bar') + optional = TestTag('optional', 'bing') + + intent, tags = \ + self.require_intent.validate_with_tags([required], + confidence=1.0) + self.assertEqual(1.0, intent.get('confidence')) + self.assertListEqual([required], tags) + + intent, tags = \ + self.one_of_intent.validate_with_tags([one_of_1], + confidence=1.0) + self.assertEqual(1.0, intent.get('confidence')) + self.assertListEqual([one_of_1], tags) + + intent, tags = \ + self.optional_intent.validate_with_tags([optional], + confidence=1.0) + self.assertEqual(1.0, intent.get('confidence')) + self.assertListEqual([optional], tags) + + def test_weighted_scoring_from_regex_entities(self): + required = TestTag('required', 'foo', entity_confidence=0.5) + one_of_1 = TestTag('one_of_1', 'bar', entity_confidence=0.5) + optional = TestTag('optional', 'bing', entity_confidence=0.5) + + intent, tags = \ + self.require_intent.validate_with_tags([required], + confidence=1.0) + self.assertEqual(0.5, intent.get('confidence')) + self.assertListEqual([required], tags) + + intent, tags = \ + self.one_of_intent.validate_with_tags([one_of_1], + confidence=1.0) + self.assertEqual(0.5, intent.get('confidence')) + self.assertListEqual([one_of_1], tags) + + intent, tags = \ + self.optional_intent.validate_with_tags([optional], + confidence=1.0) + self.assertEqual(0.5, intent.get('confidence')) + self.assertListEqual([optional], tags) + + def test_weighted_scoring_from_fuzzy_matching(self): + required = TestTag('required', 'foo') + one_of_1 = TestTag('one_of_1', 'bar') + optional = TestTag('optional', 'bing') + + intent, tags = \ + self.require_intent.validate_with_tags([required], + confidence=0.5) + self.assertEqual(0.5, intent.get('confidence')) + self.assertListEqual([required], tags) + + intent, tags = \ + self.one_of_intent.validate_with_tags([one_of_1], + confidence=0.5) + self.assertEqual(0.5, intent.get('confidence')) + self.assertListEqual([one_of_1], tags) + + intent, tags = \ + self.optional_intent.validate_with_tags([optional], + confidence=0.5) + self.assertEqual(0.5, intent.get('confidence')) + self.assertListEqual([optional], tags)