-
Notifications
You must be signed in to change notification settings - Fork 155
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Scoring tests and fixes #131
Changes from 7 commits
21ba0b0
2d25817
f9ad8ff
b213c9c
5485d7e
5674e62
73d2dbf
4447b8d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,8 @@ | |
|
||
__author__ = 'seanfitz' | ||
|
||
import itertools | ||
|
||
CLIENT_ENTITY_NAME = 'Client' | ||
|
||
|
||
|
@@ -30,21 +32,24 @@ def find_first_tag(tags, entity_type, after_index=-1): | |
"""Searches tags for entity type after given index | ||
|
||
Args: | ||
tags(list): a list of tags with entity types to be compaired too entity_type | ||
tags(list): a list of tags with entity types to be compared to | ||
entity_type | ||
entity_type(str): This is he entity type to be looking for in tags | ||
after_index(int): the start token must be greaterthan this. | ||
after_index(int): the start token must be greater than this. | ||
|
||
Returns: | ||
( tag, v, confidence ): | ||
tag(str): is the tag that matched | ||
v(str): ? the word that matched? | ||
confidence(float): is a mesure of accuacy. 1 is full confidence and 0 is none. | ||
confidence(float): is a measure of accuracy. 1 is full confidence | ||
and 0 is none. | ||
""" | ||
for tag in tags: | ||
for entity in tag.get('entities'): | ||
for v, t in entity.get('data'): | ||
if t.lower() == entity_type.lower() and \ | ||
(tag.get('start_token', 0) > after_index or tag.get('from_context', False)): | ||
(tag.get('start_token', 0) > after_index or \ | ||
tag.get('from_context', False)): | ||
return tag, v, entity.get('confidence') | ||
|
||
return None, None, None | ||
|
@@ -58,52 +63,53 @@ def find_next_tag(tags, end_index=0): | |
|
||
|
||
def choose_1_from_each(lists): | ||
"""Takes a list of lists and returns a list of lists with one item | ||
from each list. This new list should be the length of each list multiplied | ||
by the others. 18 for an list with lists of 3, 2 and 3. Also the lenght | ||
of each sub list should be same as the length of lists passed in. | ||
""" | ||
The original implementation here was functionally equivalent to | ||
:func:`~itertools.product`, except that the former returns a generator | ||
of lists, and itertools returns a generator of tuples. This is going to do | ||
a light transform for now, until callers can be verified to work with | ||
tuples. | ||
|
||
Args: | ||
lists(list of Lists): A list of lists | ||
A list of lists or tuples, expected as input to | ||
:func:`~itertools.product` | ||
|
||
Returns: | ||
list of lists: returns a list of lists constructions of one item from each | ||
list in lists. | ||
a generator of lists, see docs on :func:`~itertools.product` | ||
""" | ||
if len(lists) == 0: | ||
yield [] | ||
else: | ||
for el in lists[0]: | ||
for next_list in choose_1_from_each(lists[1:]): | ||
yield [el] + next_list | ||
for result in itertools.product(*lists): | ||
yield list(result) | ||
|
||
|
||
def resolve_one_of(tags, at_least_one): | ||
"""This searches tags for Entities in at_least_one and returns any match | ||
"""Search through all combinations of at_least_one rules to find a | ||
combination that is covered by tags | ||
|
||
Args: | ||
tags(list): List of tags with Entities to search for Entities | ||
at_least_one(list): List of Entities to find in tags | ||
|
||
Returns: | ||
object: returns None if no match is found but returns any match as an object | ||
object: | ||
returns None if no match is found but returns any match as an object | ||
""" | ||
if len(tags) < len(at_least_one): | ||
return None | ||
|
||
for possible_resolution in choose_1_from_each(at_least_one): | ||
resolution = {} | ||
pr = possible_resolution[:] | ||
for entity_type in pr: | ||
last_end_index = -1 | ||
if entity_type in resolution: | ||
last_end_index = resolution[entity_type][-1].get('end_token') | ||
tag, value, c = find_first_tag(tags, entity_type, after_index=last_end_index) | ||
tag, value, c = find_first_tag(tags, entity_type, | ||
after_index=last_end_index) | ||
if not tag: | ||
break | ||
else: | ||
if entity_type not in resolution: | ||
resolution[entity_type] = [] | ||
resolution[entity_type].append(tag) | ||
# Check if this is a valid resolution (all one_of rules matched) | ||
if len(resolution) == len(possible_resolution): | ||
return resolution | ||
|
||
|
@@ -129,23 +135,24 @@ def validate(self, tags, confidence): | |
"""Using this method removes tags from the result of validate_with_tags | ||
|
||
Returns: | ||
intent(intent): Resuts from validate_with_tags | ||
intent(intent): Results from validate_with_tags | ||
""" | ||
intent, tags = self.validate_with_tags(tags, confidence) | ||
return intent | ||
|
||
def validate_with_tags(self, tags, parse_weight): | ||
def validate_with_tags(self, tags, confidence): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's unfortunate, but this actually constitutes a breaking change (specifically named arguments; caught it when merging some new tests). I have kept all the previous fixes where we were overriding confidence or using the wrong one, and we have scoring tests to validate that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it worth keeping the compatibility with something like def validate_with_tags(self, tags, confidence=None, parse_weight=None):
if parse_weight:
print("parse_weight is deprecated, use confidence instead")
confidence = confidence or parse_weight
if confidence is None:
raise TypeError('Missing required argument confidence') Probably not... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so; the parser is so lightweight that I'm actually concerned about the perf of something like that. I suppose we could just emit on first use? But I don't think the changes from the PR that introduced |
||
"""Validate whether tags has required entites for this intent to fire | ||
|
||
Args: | ||
tags(list): Tags and Entities used for validation | ||
parse_weight(float): The weight associate to the parse result, | ||
confidence(float): The weight associate to the parse result, | ||
as indicated by the parser. This is influenced by a parser | ||
that uses edit distance or context. | ||
|
||
Returns: | ||
intent, tags: Returns intent and tags used by the intent on | ||
falure to meat required entities then returns intent with confidence | ||
failure to meat required entities then returns intent with | ||
confidence | ||
of 0.0 and an empty list for tags. | ||
""" | ||
result = {'intent_type': self.name} | ||
|
@@ -154,7 +161,8 @@ def validate_with_tags(self, tags, parse_weight): | |
used_tags = [] | ||
|
||
for require_type, attribute_name in self.requires: | ||
required_tag, canonical_form, tag_confidence = find_first_tag(local_tags, require_type) | ||
required_tag, canonical_form, tag_confidence = \ | ||
find_first_tag(local_tags, require_type) | ||
if not required_tag: | ||
result['confidence'] = 0.0 | ||
return result, [] | ||
|
@@ -166,20 +174,24 @@ def validate_with_tags(self, tags, parse_weight): | |
intent_confidence += tag_confidence | ||
|
||
if len(self.at_least_one) > 0: | ||
best_resolution = resolve_one_of(tags, self.at_least_one) | ||
best_resolution = resolve_one_of(local_tags, self.at_least_one) | ||
if not best_resolution: | ||
result['confidence'] = 0.0 | ||
return result, [] | ||
else: | ||
for key in best_resolution: | ||
result[key] = best_resolution[key][0].get('key') # TODO: at least one must support aliases | ||
intent_confidence += 1.0 * best_resolution[key][0]['entities'][0].get('confidence', 1.0) | ||
used_tags.append(best_resolution) | ||
# TODO: at least one should support aliases | ||
result[key] = best_resolution[key][0].get('key') | ||
intent_confidence += \ | ||
1.0 * best_resolution[key][0]['entities'][0]\ | ||
.get('confidence', 1.0) | ||
used_tags.append(best_resolution[key][0]) | ||
if best_resolution in local_tags: | ||
local_tags.remove(best_resolution) | ||
local_tags.remove(best_resolution[key][0]) | ||
|
||
for optional_type, attribute_name in self.optional: | ||
optional_tag, canonical_form, tag_confidence = find_first_tag(local_tags, optional_type) | ||
optional_tag, canonical_form, tag_confidence = \ | ||
find_first_tag(local_tags, optional_type) | ||
if not optional_tag or attribute_name in result: | ||
continue | ||
result[attribute_name] = canonical_form | ||
|
@@ -188,9 +200,11 @@ def validate_with_tags(self, tags, parse_weight): | |
used_tags.append(optional_tag) | ||
intent_confidence += tag_confidence | ||
|
||
total_confidence = (intent_confidence / len(tags) * parse_weight) if tags else 0.0 | ||
total_confidence = (intent_confidence / len(tags) * confidence) \ | ||
if tags else 0.0 | ||
|
||
target_client, canonical_form, parse_weight = find_first_tag(local_tags, CLIENT_ENTITY_NAME) | ||
target_client, canonical_form, confidence = \ | ||
find_first_tag(local_tags, CLIENT_ENTITY_NAME) | ||
|
||
result['target'] = target_client.get('key') if target_client else None | ||
result['confidence'] = total_confidence | ||
|
@@ -204,7 +218,7 @@ class IntentBuilder(object): | |
|
||
Attributes: | ||
at_least_one(list): A list of Entities where one is required. | ||
These are seperated into lists so you can have one of (A or B) and | ||
These are separated into lists so you can have one of (A or B) and | ||
then require one of (D or F). | ||
requires(list): A list of Required Entities | ||
optional(list): A list of optional Entities | ||
|
@@ -214,14 +228,18 @@ class IntentBuilder(object): | |
This is designed to allow construction of intents in one line. | ||
|
||
Example: | ||
IntentBuilder("Intent").requires("A").one_of("C","D").optional("G").build() | ||
IntentBuilder("Intent")\ | ||
.requires("A")\ | ||
.one_of("C","D")\ | ||
.optional("G").build() | ||
""" | ||
def __init__(self, intent_name): | ||
""" | ||
Constructor | ||
|
||
Args: | ||
intent_name(str): the name of the intents that this parser parses/validates | ||
intent_name(str): the name of the intents that this parser | ||
parses/validates | ||
""" | ||
self.at_least_one = [] | ||
self.requires = [] | ||
|
@@ -230,7 +248,8 @@ def __init__(self, intent_name): | |
|
||
def one_of(self, *args): | ||
""" | ||
The intent parser should require one of the provided entity types to validate this clause. | ||
The intent parser should require one of the provided entity types to | ||
validate this clause. | ||
|
||
Args: | ||
args(args): *args notation list of entity names | ||
|
@@ -247,7 +266,8 @@ def require(self, entity_type, attribute_name=None): | |
|
||
Args: | ||
entity_type(str): an entity type | ||
attribute_name(str): the name of the attribute on the parsed intent. Defaults to match entity_type. | ||
attribute_name(str): the name of the attribute on the parsed intent. | ||
Defaults to match entity_type. | ||
|
||
Returns: | ||
self: to continue modifications. | ||
|
@@ -259,11 +279,13 @@ def require(self, entity_type, attribute_name=None): | |
|
||
def optionally(self, entity_type, attribute_name=None): | ||
""" | ||
Parsed intents from this parser can optionally include an entity of the provided type. | ||
Parsed intents from this parser can optionally include an entity of the | ||
provided type. | ||
|
||
Args: | ||
entity_type(str): an entity type | ||
attribute_name(str): the name of the attribute on the parsed intent. Defaults to match entity_type. | ||
attribute_name(str): the name of the attribute on the parsed intent. | ||
Defaults to match entity_type. | ||
|
||
Returns: | ||
self: to continue modifications. | ||
|
@@ -279,4 +301,5 @@ def build(self): | |
|
||
:return: an Intent instance. | ||
""" | ||
return Intent(self.name, self.requires, self.at_least_one, self.optional) | ||
return Intent(self.name, self.requires, | ||
self.at_least_one, self.optional) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the
\
is technically not needed when in parenthesis like this