Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a new folder of updated e2e tests with assertions #46

Merged
merged 17 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 110 additions & 1 deletion .github/workflows/continous-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:
path: models/${{steps.upload_model.outputs.model}}.tar.gz

run_e2e_tests:
name: Run e2e Tests
name: Run e2e tests
runs-on: ubuntu-22.04
needs: [train-model]

Expand Down Expand Up @@ -206,3 +206,112 @@ jobs:
- name: Stop Duckling server
run: |
make stop-duckling

run_e2e_tests_with_assertions:
name: Run e2e tests with assertions
runs-on: ubuntu-22.04
needs: [train-model]

steps:
- name: Checkout git repository 🕝
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c

- name: Setup Python
uses: actions/setup-python@57ded4d7d5e986d7296eab16560982c6dd7c923b
with:
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}

- name: Install poetry 🦄
uses: Gr1N/setup-poetry@15821dc8a61bc630db542ae4baf6a7c19a994844
with:
poetry-version: ${{ env.POETRY_VERSION }}

- name: Load Poetry Cached Libraries ⬇
id: cache-poetry
uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8
with:
path: .venv
key: ${{ runner.os }}-poetry-${{ env.POETRY_VERSION }}-${{ env.DEFAULT_PYTHON_VERSION }}-${{ hashFiles('**/poetry.lock') }}
restore-keys: ${{ runner.os }}-poetry-${{ env.DEFAULT_PYTHON_VERSION }}

- name: Create virtual environment
if: steps.cache-poetry.outputs.cache-hit != 'true'
run: python -m venv create .venv

- name: Set up virtual environment
run: poetry config virtualenvs.in-project true

# Authenticate with gcloud for release registry (where Rasa is published)
- id: "auth-release"
name: Authenticate with gcloud for release registry 🎫
uses: "google-github-actions/auth@ef5d53e30bbcd8d0836f4288f5e50ff3e086997d"
with:
token_format: 'access_token'
credentials_json: "${{ secrets.RASA_RELEASES_READ }}"

- name: Configure OAuth token for poetry
run: |
poetry config http-basic.rasa-plus oauth2accesstoken $(gcloud auth print-access-token)

- name: Install Dependencies 📦
run: |
make install

- uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a
with:
name: trained-model
path: models/

- name: Init LLM Cache
id: cache-llm
uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8
with:
path: .rasa
key: rasa-llm-cache

- name: Run action server
env:
OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
RASA_PRO_BETA_INTENTLESS: true
run: |
make actions &

- name: Run duckling server
run: |
make run-duckling

- name: Run e2e passing tests with assertions
env:
OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
RASA_PRO_BETA_E2E_ASSERTIONS: true
run: |
make test-passing-assertions

- name: Run e2e flaky tests with assertions
if: always()
env:
OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
RASA_PRO_BETA_E2E_ASSERTIONS: true
run: |
make test-flaky-assertions || true

- name: Run e2e failing tests with assertions
if: always()
env:
OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
RASA_PRO_BETA_E2E_ASSERTIONS: true
run: |
make test-failing-assertions | grep '0 passed'

- name: Stop Duckling server
run: |
make stop-duckling

3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,6 @@ models/
prompts/
tests/
qdrant_storage/

# mlflow
mlruns/
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,12 @@ test-one: .EXPORT_ALL_VARIABLES

stop-duckling:
docker stop duckling_container

test-passing-assertions: .EXPORT_ALL_VARIABLES
poetry run rasa test e2e e2e_tests_with_assertions/passing

test-flaky-assertions: .EXPORT_ALL_VARIABLES
poetry run rasa test e2e e2e_tests_with_assertions/flaky

test-failing-assertions: .EXPORT_ALL_VARIABLES
poetry run rasa test e2e e2e_tests_with_assertions/failing
2 changes: 1 addition & 1 deletion actions/ask_for_slot_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def run(

if len(restaurant_names) > 0:
dispatcher.utter_message(
text="Do you know which restaurant you would like me to reverse a table at?",
text="Do you know which restaurant you would like me to reserve a table at?",
buttons=[
{"title": r, "payload": f'/inform{{"restaurant_name":"{r}"}}'}
for r in restaurant_names
Expand Down
9 changes: 5 additions & 4 deletions actions/setup_recurrent_payment.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def parse_datetime(text: str) -> Optional[datetime]:
if isinstance(parsed_value, dict):
parsed_value = parsed_value["from"]

return datetime.fromisoformat(parsed_value)
result = datetime.fromisoformat(parsed_value)
return result.replace(tzinfo=None)


class ValidatePaymentStartDate(Action):
Expand All @@ -43,7 +44,7 @@ def run(
dispatcher.utter_message(response="utter_invalid_date")
return [SlotSet("recurrent_payment_start_date", None)]

return [SlotSet("recurrent_payment_start_date", start_date.isoformat())]
return [SlotSet("recurrent_payment_start_date", start_date.strftime("%Y-%m-%d"))]


class ValidatePaymentEndDate(Action):
Expand All @@ -66,11 +67,11 @@ def run(
return [SlotSet("recurrent_payment_end_date", None)]

start_date = tracker.get_slot("recurrent_payment_start_date")
if start_date is not None and end_date < datetime.fromisoformat(start_date):
if start_date is not None and end_date < datetime.strptime(start_date, "%Y-%m-%d"):
dispatcher.utter_message(response="utter_invalid_date")
return [SlotSet("recurrent_payment_end_date", None)]

return [SlotSet("recurrent_payment_end_date", end_date.isoformat())]
return [SlotSet("recurrent_payment_end_date", end_date.strftime("%Y-%m-%d"))]


class ExecutePayment(Action):
Expand Down
2 changes: 1 addition & 1 deletion config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pipeline:
- name: NLUCommandAdapter
- name: SingleStepLLMCommandGenerator
llm:
model_name: gpt-4
model: gpt-4
request_timeout: 7
temperature: 0.0
top_p: 0.0
Expand Down
2 changes: 1 addition & 1 deletion domain/flows/check_portfolio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ responses:
title: stocks
- payload: bonds
title: bonds
- payload: mutual_funds
- payload: /SetSlots(portfolio_type=mutual_funds)
title: mutual funds
utter_portfolio_options_found:
- text: "Your {portfolio_type} portfolio: {portfolio_options}"
Expand Down
2 changes: 1 addition & 1 deletion e2e_tests/passing/happy_path/user_checks_portfolio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ test_cases:
- user: "1234"
- utter: utter_authentication_successful
- utter: utter_ask_portfolio_type
- user: mutual funds
- user: /SetSlots(portfolio_type=mutual_funds)
- slot_was_set:
- portfolio_type: mutual_funds
- portfolio_exists: True
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
test_cases:
- test_case: user tries to use abstract values
steps:
- user: send money
assertions:
- bot_uttered:
utter_name: utter_ask_transfer_money_recipient
# the llm is extracting "good friend" as the recipient
- user: to a good friend
assertions:
- slot_was_not_set:
- name: transfer_money_recipient
- bot_uttered:
utter_name: utter_ask_transfer_money_recipient
- user: okay, to Mary
assertions:
- slot_was_set:
- name: transfer_money_recipient
value: Mary
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: "50"
assertions:
- slot_was_set:
- name: transfer_money_amount_of_money
value: "50"
- bot_uttered:
utter_name: utter_ask_transfer_money_final_confirmation
- user: "yes"
assertions:
- bot_uttered:
utter_name: utter_transfer_complete
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
test_cases:
- test_case: user tries to skip a question and then cancels the flow (hard)
steps:
- user: send money to John
assertions:
- slot_was_set:
- name: transfer_money_recipient
value: John
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: i don't want to answer this
assertions:
# cancel flow instead of skip question is predicted
- bot_uttered:
utter_name: utter_skip_question_answer
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: i don't want to continue
assertions:
- bot_uttered:
utter_name: utter_flow_cancelled_rasa
- bot_uttered:
utter_name: utter_can_do_something_else
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
test_cases:
- test_case: user tries to skip a question multiple times (hard)
steps:
- user: send money to John
assertions:
- slot_was_set:
- name: transfer_money_recipient
value: John
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: i don't want to answer this for now
assertions:
- bot_uttered:
utter_name: utter_skip_question_answer
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: i don't want to answer this
# cancel flow instead of skip question is predicted
assertions:
- bot_uttered:
utter_name: utter_skip_question_answer
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: okay 50
assertions:
- slot_was_set:
- name: utter_ask_transfer_money_amount_of_money
value: "50"
- bot_uttered:
utter_name: utter_ask_transfer_money_final_confirmation
- user: "yes"
assertions:
- bot_uttered:
utter_name: utter_transfer_complete
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
test_cases:
- test_case: user tries to skip a question once (hard)
steps:
- user: send money to John
assertions:
- slot_was_set:
- name: transfer_money_recipient
value: John
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: i don't want to answer this
assertions:
# cancel flow instead of skip question is predicted
- bot_uttered:
utter_name: utter_skip_question_answer
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: okay 50
assertions:
- slot_was_set:
- name: utter_ask_transfer_money_amount_of_money
value: "50"
- bot_uttered:
utter_name: utter_ask_transfer_money_final_confirmation
- user: "yes"
assertions:
- bot_uttered:
utter_name: utter_transfer_complete
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# instead of clarify a start flow command for "list_contact" is predicted

fixtures:
- route_to_calm:
- route_session_to_calm: True

test_cases:
- test_case: user sends short noun only message
fixtures:
- route_to_calm
steps:
- user: contact
assertions:
- bot_uttered:
utter_name: utter_clarification_options_rasa
- user: add
assertions:
- bot_uttered:
utter_name: utter_ask_add_contact_handle
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# recurrent_payment_type is not mapped to standing order

test_cases:
- test_case: user wants to set up a new recurrent payment, but specifies the type incompletely, example 3
steps:
- user: I want to set up a new recurrent payment
assertions:
- bot_uttered:
utter_name: utter_ask_recurrent_payment_type
- user: stand order
assertions:
- slot_was_set:
- name: recurrent_payment_type
value: standing order
- bot_uttered:
utter_name: utter_ask_recipient
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
fixtures:
- non_california_resident:
- based_in_california: False
- route_session_to_calm: True

test_cases:
- test_case: Register to vote for non-California resident (should not trigger)
fixtures:
- non_california_resident
steps:
- user: I want to register to vote
# ChitChat is predicted instead of no command being predicted
assertions:
- bot_uttered:
utter_name: utter_cannot_answer
Loading
Loading