Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scoring tests and fixes #131

Merged
merged 8 commits into from
May 26, 2021
109 changes: 66 additions & 43 deletions adapt/intent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

__author__ = 'seanfitz'

import itertools

CLIENT_ENTITY_NAME = 'Client'


Expand All @@ -30,21 +32,24 @@ def find_first_tag(tags, entity_type, after_index=-1):
"""Searches tags for entity type after given index

Args:
tags(list): a list of tags with entity types to be compaired too entity_type
tags(list): a list of tags with entity types to be compared to
entity_type
entity_type(str): This is he entity type to be looking for in tags
after_index(int): the start token must be greaterthan this.
after_index(int): the start token must be greater than this.

Returns:
( tag, v, confidence ):
tag(str): is the tag that matched
v(str): ? the word that matched?
confidence(float): is a mesure of accuacy. 1 is full confidence and 0 is none.
confidence(float): is a measure of accuracy. 1 is full confidence
and 0 is none.
"""
for tag in tags:
for entity in tag.get('entities'):
for v, t in entity.get('data'):
if t.lower() == entity_type.lower() and \
(tag.get('start_token', 0) > after_index or tag.get('from_context', False)):
(tag.get('start_token', 0) > after_index or \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the \ is technically not needed when in parenthesis like this

tag.get('from_context', False)):
return tag, v, entity.get('confidence')

return None, None, None
Expand All @@ -58,52 +63,53 @@ def find_next_tag(tags, end_index=0):


def choose_1_from_each(lists):
"""Takes a list of lists and returns a list of lists with one item
from each list. This new list should be the length of each list multiplied
by the others. 18 for an list with lists of 3, 2 and 3. Also the lenght
of each sub list should be same as the length of lists passed in.
"""
The original implementation here was functionally equivalent to
:func:`~itertools.product`, except that the former returns a generator
of lists, and itertools returns a generator of tuples. This is going to do
a light transform for now, until callers can be verified to work with
tuples.

Args:
lists(list of Lists): A list of lists
A list of lists or tuples, expected as input to
:func:`~itertools.product`

Returns:
list of lists: returns a list of lists constructions of one item from each
list in lists.
a generator of lists, see docs on :func:`~itertools.product`
"""
if len(lists) == 0:
yield []
else:
for el in lists[0]:
for next_list in choose_1_from_each(lists[1:]):
yield [el] + next_list
for result in itertools.product(*lists):
yield list(result)


def resolve_one_of(tags, at_least_one):
"""This searches tags for Entities in at_least_one and returns any match
"""Search through all combinations of at_least_one rules to find a
combination that is covered by tags

Args:
tags(list): List of tags with Entities to search for Entities
at_least_one(list): List of Entities to find in tags

Returns:
object: returns None if no match is found but returns any match as an object
object:
returns None if no match is found but returns any match as an object
"""
if len(tags) < len(at_least_one):
return None

for possible_resolution in choose_1_from_each(at_least_one):
resolution = {}
pr = possible_resolution[:]
for entity_type in pr:
last_end_index = -1
if entity_type in resolution:
last_end_index = resolution[entity_type][-1].get('end_token')
tag, value, c = find_first_tag(tags, entity_type, after_index=last_end_index)
tag, value, c = find_first_tag(tags, entity_type,
after_index=last_end_index)
if not tag:
break
else:
if entity_type not in resolution:
resolution[entity_type] = []
resolution[entity_type].append(tag)
# Check if this is a valid resolution (all one_of rules matched)
if len(resolution) == len(possible_resolution):
return resolution

Expand All @@ -129,23 +135,24 @@ def validate(self, tags, confidence):
"""Using this method removes tags from the result of validate_with_tags

Returns:
intent(intent): Resuts from validate_with_tags
intent(intent): Results from validate_with_tags
"""
intent, tags = self.validate_with_tags(tags, confidence)
return intent

def validate_with_tags(self, tags, parse_weight):
def validate_with_tags(self, tags, confidence):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's unfortunate, but this actually constitutes a breaking change (specifically named arguments; caught it when merging some new tests). I have kept all the previous fixes where we were overriding confidence or using the wrong one, and we have scoring tests to validate that.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth keeping the compatibility with something like

    def validate_with_tags(self, tags, confidence=None, parse_weight=None):
        if parse_weight:
            print("parse_weight is deprecated, use confidence instead")
        confidence = confidence or parse_weight
        if confidence is None:
            raise TypeError('Missing required argument confidence')

Probably not...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so; the parser is so lightweight that I'm actually concerned about the perf of something like that. I suppose we could just emit on first use? But I don't think the changes from the PR that introduced parse_weight have made it too broadly into the world yet, so its uses should be minimal.

"""Validate whether tags has required entites for this intent to fire

Args:
tags(list): Tags and Entities used for validation
parse_weight(float): The weight associate to the parse result,
confidence(float): The weight associate to the parse result,
as indicated by the parser. This is influenced by a parser
that uses edit distance or context.

Returns:
intent, tags: Returns intent and tags used by the intent on
falure to meat required entities then returns intent with confidence
failure to meat required entities then returns intent with
confidence
of 0.0 and an empty list for tags.
"""
result = {'intent_type': self.name}
Expand All @@ -154,7 +161,8 @@ def validate_with_tags(self, tags, parse_weight):
used_tags = []

for require_type, attribute_name in self.requires:
required_tag, canonical_form, tag_confidence = find_first_tag(local_tags, require_type)
required_tag, canonical_form, tag_confidence = \
find_first_tag(local_tags, require_type)
if not required_tag:
result['confidence'] = 0.0
return result, []
Expand All @@ -166,20 +174,24 @@ def validate_with_tags(self, tags, parse_weight):
intent_confidence += tag_confidence

if len(self.at_least_one) > 0:
best_resolution = resolve_one_of(tags, self.at_least_one)
best_resolution = resolve_one_of(local_tags, self.at_least_one)
if not best_resolution:
result['confidence'] = 0.0
return result, []
else:
for key in best_resolution:
result[key] = best_resolution[key][0].get('key') # TODO: at least one must support aliases
intent_confidence += 1.0 * best_resolution[key][0]['entities'][0].get('confidence', 1.0)
used_tags.append(best_resolution)
# TODO: at least one should support aliases
result[key] = best_resolution[key][0].get('key')
intent_confidence += \
1.0 * best_resolution[key][0]['entities'][0]\
.get('confidence', 1.0)
used_tags.append(best_resolution[key][0])
if best_resolution in local_tags:
local_tags.remove(best_resolution)
local_tags.remove(best_resolution[key][0])

for optional_type, attribute_name in self.optional:
optional_tag, canonical_form, tag_confidence = find_first_tag(local_tags, optional_type)
optional_tag, canonical_form, tag_confidence = \
find_first_tag(local_tags, optional_type)
if not optional_tag or attribute_name in result:
continue
result[attribute_name] = canonical_form
Expand All @@ -188,9 +200,11 @@ def validate_with_tags(self, tags, parse_weight):
used_tags.append(optional_tag)
intent_confidence += tag_confidence

total_confidence = (intent_confidence / len(tags) * parse_weight) if tags else 0.0
total_confidence = (intent_confidence / len(tags) * confidence) \
if tags else 0.0

target_client, canonical_form, parse_weight = find_first_tag(local_tags, CLIENT_ENTITY_NAME)
target_client, canonical_form, confidence = \
find_first_tag(local_tags, CLIENT_ENTITY_NAME)

result['target'] = target_client.get('key') if target_client else None
result['confidence'] = total_confidence
Expand All @@ -204,7 +218,7 @@ class IntentBuilder(object):

Attributes:
at_least_one(list): A list of Entities where one is required.
These are seperated into lists so you can have one of (A or B) and
These are separated into lists so you can have one of (A or B) and
then require one of (D or F).
requires(list): A list of Required Entities
optional(list): A list of optional Entities
Expand All @@ -214,14 +228,18 @@ class IntentBuilder(object):
This is designed to allow construction of intents in one line.

Example:
IntentBuilder("Intent").requires("A").one_of("C","D").optional("G").build()
IntentBuilder("Intent")\
.requires("A")\
.one_of("C","D")\
.optional("G").build()
"""
def __init__(self, intent_name):
"""
Constructor

Args:
intent_name(str): the name of the intents that this parser parses/validates
intent_name(str): the name of the intents that this parser
parses/validates
"""
self.at_least_one = []
self.requires = []
Expand All @@ -230,7 +248,8 @@ def __init__(self, intent_name):

def one_of(self, *args):
"""
The intent parser should require one of the provided entity types to validate this clause.
The intent parser should require one of the provided entity types to
validate this clause.

Args:
args(args): *args notation list of entity names
Expand All @@ -247,7 +266,8 @@ def require(self, entity_type, attribute_name=None):

Args:
entity_type(str): an entity type
attribute_name(str): the name of the attribute on the parsed intent. Defaults to match entity_type.
attribute_name(str): the name of the attribute on the parsed intent.
Defaults to match entity_type.

Returns:
self: to continue modifications.
Expand All @@ -259,11 +279,13 @@ def require(self, entity_type, attribute_name=None):

def optionally(self, entity_type, attribute_name=None):
"""
Parsed intents from this parser can optionally include an entity of the provided type.
Parsed intents from this parser can optionally include an entity of the
provided type.

Args:
entity_type(str): an entity type
attribute_name(str): the name of the attribute on the parsed intent. Defaults to match entity_type.
attribute_name(str): the name of the attribute on the parsed intent.
Defaults to match entity_type.

Returns:
self: to continue modifications.
Expand All @@ -279,4 +301,5 @@ def build(self):

:return: an Intent instance.
"""
return Intent(self.name, self.requires, self.at_least_one, self.optional)
return Intent(self.name, self.requires,
self.at_least_one, self.optional)
Loading