From 01686c9c622565cfd198e99ef0f49032e357fccd Mon Sep 17 00:00:00 2001 From: Tristan Vanderbruggen Date: Tue, 5 Mar 2024 03:20:57 +0000 Subject: [PATCH] Added markdow files for docs. --- README.md | 187 +++--------------------------- docs/language.md | 48 ++++++++ docs/roadmap.md | 30 +++++ docs/setup.md | 36 ++++++ docs/tutorial.md | 4 + docs/usage.md | 71 ++++++++++++ share/wip.ipynb | 293 ++++++++++++++++------------------------------- tests/.gitignore | 1 + 8 files changed, 300 insertions(+), 370 deletions(-) create mode 100644 docs/language.md create mode 100644 docs/roadmap.md create mode 100644 docs/setup.md create mode 100644 docs/tutorial.md create mode 100644 docs/usage.md create mode 100644 tests/.gitignore diff --git a/README.md b/README.md index 2e62bda..39b8459 100644 --- a/README.md +++ b/README.md @@ -1,184 +1,23 @@ ⚙ Automaton & Cognition ============================= -[![PIP](https://github.com/LLNL/AutoCog/workflows/pip/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions) -[![Frontend](https://github.com/LLNL/AutoCog/workflows/frontend/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions) -[![CLI](https://github.com/LLNL/AutoCog/actions/workflows/cli.yml/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions) - -Auotmaton & Cognition explores mechanisms to build automaton that drive cognitive processes. -To this end, we defined a programming model, Structured Thoughts, with a language that compiles to a set of automaton. - -## Structured Thoughts - -In the Structured Thoughts programming model, prompts are akin to the building blocks of traditional computer programs. -Prompts are compiled to automaton that ensure that the resulting completion can be parsed to extract structured data. -Branching between prompts is controlled by the language model. -The dataflow is statically defined and executed when instantiating the automaton of each prompt. -Calls (to other prompts or python tools) are executed during the dataflow phase. - -Below, we show a single prompt program which implement Chain-of-Thoughts (CoT) to answer a multiple choice question. -In this examples, the language model is presented with the `topic`, the `question`, and four `choices`. -It can then think using one to ten `thought` (up 20 tokens for each). -Eventually, the model must indicate the index of the correct choice. - -``` -format thought { - is text<20>; - annotate f"a short text representing a single thought, it does not have to be a proper sentence."; -} - -prompt main { - is { - topic is text<20>; - question is text<50>; - choices[4] is text<40>; - work[1:10] is thought; - answer is select(.choices); - } - channel { - to .topic from ?topic; - to .question from ?question; - to .choices from ?choices; - } - return { - from .answer; - } - annotate { - _ as "You are answering a multiple choice questionnaire."; - .topic as "the general category from which the question was taken"; - .question as "the question that you have to answer"; - .choices as "the four possible choices to answer the question, only one is correct"; - .work as "show your work step-by-step"; - .answer as "you pick the index of the choice that best answer the question"; - } -} -``` - -We are developing the [MCQ](./library/mcq) library of program to illustrate thought patterns that are achievable using Structured Thoughts. - -## Getting started - -### Install - -As simple as `pip install -U git+https://github.com/LLNL/AutoCog`. - -But, you'll probably want to clone the repository to get the library of programs: -``` -git clone https://github.com/LLNL/AutoCog -pip install -U ./AutoCog -``` - -### LLM Setup - -#### LLama.cpp and GGUF models - -We download model from [TheBloke](https://huggingface.co/TheBloke) on Hugging Face. -For example, you can donwload LlaMa 2 with 7B parameters and tuned for Chat with: -``` -wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf -``` -This is a 4 bits version, aka `Q4_K_M` in the name. It is the main model we use for testing. - -To run GGUF model, we use a [modified version](https://github.com/tristanvdb/llama-cpp-python/tree/choice-dev) of the `llama-cpp-python` package. -It provides python bindings and will build `LLama.cpp`. -Our changes permit us to implement `greedy` completion (returning logprob for all tokens). -``` -pip install -y git+https://github.com/tristanvdb/llama-cpp-python@choice-dev -``` - -> TODO v0.5: connect to low-level API in `llama-cpp-python` so that we can use the default release - -#### HuggingFace Transformers - -> TODO v0.6: connection for HuggingFace Transformers package (use to have it but not tested) - -### Inside a Notebook - -Most of the development is done inside Python notebook (jupiterlab). -Eventually, several notebooks demonstrating various part of AutoCog will be provided in the [share](./share) folder. -To get an idea of our progress, take a look at the [WIP Notebook](./share/wip.ipynb). - -### Command line - -We are building a command line tool to use AutoCog. - -`python3 -m autocog --help` - -``` -usage: __main__.py [-h] [--version] [--orch ORCH] [--gguf GGUF] [--gguf-ctx GGUF_CTX] [--syntax SYNTAX] [--cogs COGS] [--command COMMAND] [--output OUTPUT] [--prefix PREFIX] [--serve] [--host HOST] [--port PORT] [--debug] - -optional arguments: - -h, --help show this help message and exit - --version show program's version number and exit - --orch ORCH Type of orchestrator: `serial` or `async`. (default: serial) - --gguf GGUF Load a model from a GGUF file using llama.cpp (and llama-cpp-python) (default: None) - --gguf-ctx GGUF_CTX Context size for GGUF models (default: 4096) - --syntax SYNTAX One of `Llama-2-Chat`, `ChatML`, `Guanaco` or a dictionary of the kwargs to initialize a Syntax object (inlined JSON or path to a file). (default: None) - --cogs COGS Files to load as cog in the architecture, prefix with its identifier else the filename is used. For example, `some/cognitive/mcq.sta` and `my.tool:some/python/tool.py` will load a Structured Thought - Automaton as `mcq` and a Python file as `my.tool`. (default: None) - --command COMMAND Command to be executed by the architecture as a dictionary. `__tag` identify the cog while `__entry` identify the entry point in this cog (defaults to `main`). All other field will be forwarded as - keyworded args. Example: `{ "__tag" : "writer", "__entry" : "main", **kwarg }` (inlined JSON or path to a file). Can also provide one or more list of dictionary. (default: None) - --output OUTPUT Directory where results are stored. (default: /home/tristan/projects/LLM/AutoCog) - --prefix PREFIX String to identify this instance of AutoCog (default: autocog) - --serve Whether to launch the flask server. (default: False) - --host HOST Host for flask server. (default: localhost) - --port PORT Port for flask server. (default: 5000) - --debug Whether to run the flask server in debug mode. (default: False) -``` - -Some examples: -``` -python3 -m autocog --gguf /data/models/tinyllama-2-1b-miniguanaco.Q4_K_M.gguf --syntax Guanaco \ - --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta \ - --command '{ "__tag" : "mmlu.repeat_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }' -``` -``` -python3 -m autocog --gguf /data/models/llama-2-7b-chat.Q4_K_M.gguf --syntax Llama-2-Chat \ - --syntax '{ "prompt_with_format" : false, "prompt_with_index" : false, "prompt_indent" : "" }' \ - --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta \ - --cogs mmlu.select_cot:library/mmlu-exams/select-cot.sta \ - --command '{ "__tag" : "mmlu.repeat_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }' \ - --command '{ "__tag" : "mmlu.select_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }' -``` - -Currently, the AutoCog application only saves the output of the commands in a JSON file. - -> TODO v0.5: saving the "pages" - -### Web Application - -The goal is to provide a development environment. -Particularly, the ability to inspect and edit/replay `frames`. -These are created for each execution of an `Automaton` (nested when an `Automaton` call another `Automaton`). -Upon ending, the execution trace of the `Automaton` is saved in the corresponding frame. - -Eventually, we want to use these traces for two purposes: - - replay: edit part of the trace then restart the program from that point - - finetuning: select "succesful" frames to finetune models - -Run the command below at the root of the repository to launch a server. It uses [quart](http://pgjones.gitlab.io/quart). -``` -python3 -m autocog --serve --host 0.0.0.0 --port 5000 --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta -``` - -### Testing - -Currently only pushes to selected branches trigger GitHub actions. -The results for `master` are shown at the top of this README. - -We run three tests: - - `pip install` - - Structured Thoughts frontend (parsing some non-sensical but lexicographically correct sample of the language) - - AutoCog CLI to load the MMLU-Exams and run a very simple query - -Currently, tests involving a model use the Random Language Model ([see rambling here](./tests/cli-mmlu.sh)). -Looking for alternative to making the GitHub action download Llama 2 (7b, Chat, Q4_K_M) which I use for testing. - | | PIP | Frontend | CLI | |---|---|---|---| | `master` | [![PIP](https://github.com/LLNL/AutoCog/workflows/pip/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions) | [![Frontend](https://github.com/LLNL/AutoCog/workflows/frontend/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions) | [![CLI](https://github.com/LLNL/AutoCog/actions/workflows/cli.yml/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions) | | `devel` | [![PIP](https://github.com/LLNL/AutoCog/workflows/pip/badge.svg?branch=devel)](https://github.com/LLNL/AutoCog/actions) | [![Frontend](https://github.com/LLNL/AutoCog/workflows/frontend/badge.svg?branch=devel)](https://github.com/LLNL/AutoCog/actions) | [![CLI](https://github.com/LLNL/AutoCog/actions/workflows/cli.yml/badge.svg?branch=devel)](https://github.com/LLNL/AutoCog/actions) | +Automaton & Cognition explores mechanisms to build automaton that control applications driven by auto-regressive language models. +To this end, we defined a programming model, Structured Thoughts, with a language that compiles to a set of automaton. + +We broke down the documentation into a few files: + - [setup](./docs/setup.md) + - [usage](./docs/usage.md) + - [language](./docs/language.md) + - [tutorial](./docs/tutorial.md) + - [roadmap](./docs/roadmap.md) + +The libraries have [their own documentation](./library/README.md). + ## Contributing Contributions are welcome! @@ -186,7 +25,7 @@ Contributions are welcome! So far there is only one rule: **linear git history** (no merge commits). Only the master branch have stable commits, other branches might be rebased without notice. -Version number should increase for each push to master and have a matching tag. +Version number should increase for each push to `master` and have a matching tag. ## License diff --git a/docs/language.md b/docs/language.md new file mode 100644 index 0000000..26fe5ed --- /dev/null +++ b/docs/language.md @@ -0,0 +1,48 @@ +Structured Thoughts +=================== + +In the Structured Thoughts programming model, prompts are akin to the building blocks of traditional computer programs. +Prompts are compiled to automaton that ensure that the resulting completion can be parsed to extract structured data. +Branching between prompts is controlled by the language model. +The dataflow is statically defined and executed when instantiating the automaton of each prompt. +Calls (to other prompts or python tools) are executed during the dataflow phase. + +Below, we show a single prompt program which implement Chain-of-Thoughts (CoT) to answer a multiple choice question. +In this examples, the language model is presented with the `topic`, the `question`, and four `choices`. +It can then think using one to ten `thought` (up 20 tokens for each). +Eventually, the model must indicate the index of the correct choice. + +``` +format thought { + is text<20>; + annotate f"a short text representing a single thought, it does not have to be a proper sentence."; +} + +prompt main { + is { + topic is text<20>; + question is text<50>; + choices[4] is text<40>; + work[1:10] is thought; + answer is select(.choices); + } + channel { + to .topic from ?topic; + to .question from ?question; + to .choices from ?choices; + } + return { + from .answer; + } + annotate { + _ as "You are answering a multiple choice questionnaire."; + .topic as "the general category from which the question was taken"; + .question as "the question that you have to answer"; + .choices as "the four possible choices to answer the question, only one is correct"; + .work as "show your work step-by-step"; + .answer as "you pick the index of the choice that best answer the question"; + } +} +``` + +We are developing the [MCQ](./library/mcq) library of program to illustrate thought patterns that are achievable using Structured Thoughts. \ No newline at end of file diff --git a/docs/roadmap.md b/docs/roadmap.md new file mode 100644 index 0000000..8e1503d --- /dev/null +++ b/docs/roadmap.md @@ -0,0 +1,30 @@ +Roadmap +======= + +This is a roadmap of basic features that are needed to make AutoCog (and STA) usable. +It only considers a few weeks worth of work but I rarely have brain-cycles to work on this project. + +Given that I am currently working alone on this project, I am not tracking work using issues and milestones. + +In the roadmap below, each minor versions consolidate the increment of the previous ones. +Simply, all the `v0.4.X` are steps toward `v0.5`. +These bugfix level milestones are subject to reordering (change of priority) and shifting (introducing new milestone or actual bugfixes). + +| Version | Features | Notes | Tracking | +| ------- | -------- | ----- | -------- | +| v0.4 | Structured Thoughts | release 1st version of ST | | +| v0.4.1 | Tests & Fixes | Testing more LLMs and fix tokenizations issue | | +| v0.4.2 | Roadmap & Doc | Needed some organization... | | +| v0.4.3 | Low-Level llama-cpp-python | | | +| v0.4.4 | FTA: Simplify, Choice Limit, and Norms | | | +| v0.4.5 | Beam Search | Implementation within FTA | | +| v0.5 | Language Docs | Description of the language and tutorial | | +| v0.5.1 | Tests & Fixes | Expecting that it will be needed... | | +| v0.5.2 | Unified FTA | FTA in one "loop" using llama-cpp-python low-level API | | +| v0.5.3 | Elementary | Library of elementary "worksheet" (arithmetic: add/mul/div, literacy: spelling, grammar, comprehension) | | +| v0.5.4 | MMLU-Exams | Library of MCQ Solver using different Thought Patterns | | +| v0.5.5 | FTA to BNF | Translate FTA to llama.cpp BNF | | +| v0.6 | Benchmarking | Evaluate speed and accuracy on Elementary and MMLU-Exams | | +| v0.6.1 | Tooling Benchmark | | | +| v0.7 | Finetuning | Selected foundation LLMs targetting improved performance at MMLU-Exams | | +| v0.7.1 | Finetune Tooling | | | diff --git a/docs/setup.md b/docs/setup.md new file mode 100644 index 0000000..7211275 --- /dev/null +++ b/docs/setup.md @@ -0,0 +1,36 @@ +Setup +===== + +## Install AutoCog + +As simple as `pip install -U git+https://github.com/LLNL/AutoCog`. + +But, you'll probably want to clone the repository to get the library of programs: +``` +git clone https://github.com/LLNL/AutoCog +pip install -U ./AutoCog +``` + +## LLM Setup + +### LLama.cpp and GGUF models + +We download model from [TheBloke](https://huggingface.co/TheBloke) on Hugging Face. +For example, you can donwload LlaMa 2 with 7B parameters and tuned for Chat with: +``` +wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf +``` +This is a 4 bits version, aka `Q4_K_M` in the name. It is the main model we use for testing. + +To run GGUF model, we use a [modified version](https://github.com/tristanvdb/llama-cpp-python/tree/choice-dev) of the `llama-cpp-python` package. +It provides python bindings and will build `LLama.cpp`. +Our changes permit us to implement `greedy` completion (returning logprob for all tokens). +``` +pip install -y git+https://github.com/tristanvdb/llama-cpp-python@choice-dev +``` + +> TODO v0.5: connect to low-level API in `llama-cpp-python` so that we can use the default release + +### HuggingFace Transformers + +> TODO v0.6: connection for HuggingFace Transformers package (use to have it but not tested) \ No newline at end of file diff --git a/docs/tutorial.md b/docs/tutorial.md new file mode 100644 index 0000000..82dd4f8 --- /dev/null +++ b/docs/tutorial.md @@ -0,0 +1,4 @@ +Tutorial +======== + + diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..87cb07b --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,71 @@ +Usage +===== + +## Inside a Notebook + +Most of the development is done inside Python notebook (jupiterlab). +Eventually, several notebooks demonstrating various part of AutoCog will be provided in the [share](./share) folder. +To get an idea of our progress, take a look at the [WIP Notebook](./share/wip.ipynb). + +## Command line + +We are building a command line tool to use AutoCog. + +`python3 -m autocog --help` + +``` +usage: __main__.py [-h] [--version] [--orch ORCH] [--gguf GGUF] [--gguf-ctx GGUF_CTX] [--syntax SYNTAX] [--cogs COGS] [--command COMMAND] [--output OUTPUT] [--prefix PREFIX] [--serve] [--host HOST] [--port PORT] [--debug] + +optional arguments: + -h, --help show this help message and exit + --version show program's version number and exit + --orch ORCH Type of orchestrator: `serial` or `async`. (default: serial) + --gguf GGUF Load a model from a GGUF file using llama.cpp (and llama-cpp-python) (default: None) + --gguf-ctx GGUF_CTX Context size for GGUF models (default: 4096) + --syntax SYNTAX One of `Llama-2-Chat`, `ChatML`, `Guanaco` or a dictionary of the kwargs to initialize a Syntax object (inlined JSON or path to a file). (default: None) + --cogs COGS Files to load as cog in the architecture, prefix with its identifier else the filename is used. For example, `some/cognitive/mcq.sta` and `my.tool:some/python/tool.py` will load a Structured Thought + Automaton as `mcq` and a Python file as `my.tool`. (default: None) + --command COMMAND Command to be executed by the architecture as a dictionary. `__tag` identify the cog while `__entry` identify the entry point in this cog (defaults to `main`). All other field will be forwarded as + keyworded args. Example: `{ "__tag" : "writer", "__entry" : "main", **kwarg }` (inlined JSON or path to a file). Can also provide one or more list of dictionary. (default: None) + --output OUTPUT Directory where results are stored. (default: /home/tristan/projects/LLM/AutoCog) + --prefix PREFIX String to identify this instance of AutoCog (default: autocog) + --serve Whether to launch the flask server. (default: False) + --host HOST Host for flask server. (default: localhost) + --port PORT Port for flask server. (default: 5000) + --debug Whether to run the flask server in debug mode. (default: False) +``` + +Some examples: +``` +python3 -m autocog --gguf /data/models/tinyllama-2-1b-miniguanaco.Q4_K_M.gguf --syntax Guanaco \ + --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta \ + --command '{ "__tag" : "mmlu.repeat_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }' +``` +``` +python3 -m autocog --gguf /data/models/llama-2-7b-chat.Q4_K_M.gguf --syntax Llama-2-Chat \ + --syntax '{ "prompt_with_format" : false, "prompt_with_index" : false, "prompt_indent" : "" }' \ + --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta \ + --cogs mmlu.select_cot:library/mmlu-exams/select-cot.sta \ + --command '{ "__tag" : "mmlu.repeat_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }' \ + --command '{ "__tag" : "mmlu.select_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }' +``` + +Currently, the AutoCog application only saves the output of the commands in a JSON file. + +> TODO v0.5: saving the "pages" + +## Web Application + +The goal is to provide a development environment. +Particularly, the ability to inspect and edit/replay `frames`. +These are created for each execution of an `Automaton` (nested when an `Automaton` call another `Automaton`). +Upon ending, the execution trace of the `Automaton` is saved in the corresponding frame. + +Eventually, we want to use these traces for two purposes: + - replay: edit part of the trace then restart the program from that point + - finetuning: select "succesful" frames to finetune models + +Run the command below at the root of the repository to launch a server. It uses [quart](http://pgjones.gitlab.io/quart). +``` +python3 -m autocog --serve --host 0.0.0.0 --port 5000 --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta +``` diff --git a/share/wip.ipynb b/share/wip.ipynb index d2efa68..0bfdef9 100644 --- a/share/wip.ipynb +++ b/share/wip.ipynb @@ -37,16 +37,18 @@ "model_names = [\n", " 'llama-2-7b.Q4_K_M', # wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_M.gguf\n", " 'llama-2-7b-chat.Q4_K_M', # wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf\n", + " 'llama-2-13b-chat.Q4_K_M', # wget https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_K_M.gguf\n", + " 'llama-2-13b-chat.Q5_K_M', # wget https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q5_K_M.gguf\n", " 'llama-2-13b-chat.Q8_0', # wget https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q8_0.gguf\n", " 'tinyllama-1.1b-chat-v0.3.Q4_K_M', # wget https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf\n", " 'tinyllama-2-1b-miniguanaco.Q4_K_M', # wget https://huggingface.co/TheBloke/Tinyllama-2-1b-miniguanaco-GGUF/resolve/main/tinyllama-2-1b-miniguanaco.Q4_K_M.gguf\n", " 'capybarahermes-2.5-mistral-7b.Q4_K_M' # wget https://huggingface.co/TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF/resolve/main/capybarahermes-2.5-mistral-7b.Q4_K_M.gguf\n", "]\n", "(lm,syntax) = loader(\n", - " models_path=f\"/data/models/{model_names[2]}.gguf\",\n", + " models_path=f\"/data/models/{model_names[1]}.gguf\",\n", " prompt_with_format=False,\n", " prompt_with_index=False,\n", - "# prompt_indent='> '\n", + " prompt_indent=' '\n", ")" ] }, @@ -60,16 +62,16 @@ "arch = CogArch(lm=lm, syntax=syntax)\n", "\n", "mmlu_cogs = [\n", - " arch.load('mmlu-repeat', filepath=f\"{autocog_home}/library/mmlu-exams/repeat.sta\"),\n", - " arch.load('mmlu-repeat-cot', filepath=f\"{autocog_home}/library/mmlu-exams/repeat-cot.sta\"),\n", - " arch.load('mmlu-repeat-hyp', filepath=f\"{autocog_home}/library/mmlu-exams/repeat-hyp.sta\"),\n", - " arch.load('mmlu-repeat-iter', filepath=f\"{autocog_home}/library/mmlu-exams/repeat-iter.sta\"),\n", - " arch.load('mmlu-repeat-annot', filepath=f\"{autocog_home}/library/mmlu-exams/repeat-annot.sta\"),\n", - " arch.load('mmlu-select', filepath=f\"{autocog_home}/library/mmlu-exams/select.sta\"),\n", - " arch.load('mmlu-select-cot', filepath=f\"{autocog_home}/library/mmlu-exams/select-cot.sta\"),\n", - " arch.load('mmlu-select-hyp', filepath=f\"{autocog_home}/library/mmlu-exams/select-hyp.sta\"),\n", - " arch.load('mmlu-select-iter', filepath=f\"{autocog_home}/library/mmlu-exams/select-iter.sta\"),\n", - " arch.load('mmlu-select-annot', filepath=f\"{autocog_home}/library/mmlu-exams/select-annot.sta\")\n", + " arch.load('mcq.repeat.base', filepath=f\"@mcq/repeat.sta\"),\n", + " arch.load('mcq.repeat.cot', filepath=f\"@mcq/repeat-cot.sta\"),\n", + " arch.load('mcq.repeat.hyp', filepath=f\"@mcq/repeat-hyp.sta\"),\n", + " arch.load('mcq.repeat.iter', filepath=f\"@mcq/repeat-iter.sta\"),\n", + " arch.load('mcq.repeat.annot', filepath=f\"@mcq/repeat-annot.sta\"),\n", + " arch.load('mcq.select.base', filepath=f\"@mcq/select.sta\"),\n", + " arch.load('mcq.select.cot', filepath=f\"@mcq/select-cot.sta\"),\n", + " arch.load('mcq.select.hyp', filepath=f\"@mcq/select-hyp.sta\"),\n", + " arch.load('mcq.select.iter', filepath=f\"@mcq/select-iter.sta\"),\n", + " arch.load('mcq.select.annot', filepath=f\"@mcq/select-annot.sta\")\n", "]\n", "\n", "mmlu_data = [\n", @@ -81,8 +83,8 @@ "]\n", "\n", "arith_cogs = [\n", - " arch.load('arithmetic-multiply-single', filepath=f\"{autocog_home}/library/arithmetic/multiply-single.sta\"),\n", - " arch.load('arithmetic-multiply-chain', filepath=f\"{autocog_home}/library/arithmetic/multiply-chain.sta\")\n", + " arch.load('elementary.multiply.single', filepath=f\"@elementary/multiply-single.sta\"),\n", + " arch.load('elementary.multiply.chain', filepath=f\"@elementary/multiply-chain.sta\")\n", "]\n", "\n", "arith_data = [\n", @@ -114,7 +116,7 @@ { "data": { "text/plain": [ - "['39', '21', '42', '21', '16', 3, 2, 3, 3, 2]" + "['42', '21', '42', '16', '42', 2, 2, 2, 2, 2]" ] }, "execution_count": 5, @@ -124,9 +126,10 @@ ], "source": [ "tags = [\n", - " 'mmlu-repeat', 'mmlu-repeat-cot', 'mmlu-repeat-hyp', 'mmlu-repeat-iter', 'mmlu-repeat-annot',\n", - " 'mmlu-select', 'mmlu-select-cot', 'mmlu-select-hyp', 'mmlu-select-iter', 'mmlu-select-annot'\n", + " 'mcq.repeat.base', 'mcq.repeat.cot', 'mcq.repeat.hyp', 'mcq.repeat.iter', 'mcq.repeat.annot',\n", + " 'mcq.select.base', 'mcq.select.cot', 'mcq.select.hyp', 'mcq.select.iter', 'mcq.select.annot'\n", "]\n", + "# tags = [ 'mmlu-repeat-cot', 'mmlu-select-cot' ]\n", "results = [ await arch(tag, **data) for tag in tags for data in mmlu_data ]\n", "# arith_results = [ await cog(**data) for cog in arith_cogs for data in arith_data ]\n", "results" @@ -134,7 +137,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "id": "4fd250b0-58d1-4d09-8a8d-d860af8a8493", "metadata": {}, "outputs": [ @@ -142,7 +145,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "p=0.5946437829906105\n", + "p=0.6638551711390864\n", "---------------------\n", "\n", "[INST] <>\n", @@ -152,11 +155,11 @@ "You are using the following syntax:\n", "```\n", "start:\n", - "> topic(text(20)): the general category from which the question was taken\n", - "> question(text(50)): the question that you have to answer\n", - "> choices(text(40))[4]: the four possible choices to answer the question, only one is correct\n", - "> work(thought)[1:10]: show your work step-by-step\n", - "> answer(repeat(choices)): you repeat verbatim the choice that best answer the question\n", + " topic(text(20)): the general category from which the question was taken\n", + " question(text(50)): the question that you have to answer\n", + " choices(text(40))[4]: the four possible choices to answer the question, only one is correct\n", + " work(thought)[1:10]: show your work step-by-step\n", + " answer(select(choices)): you pick the index of the choice that best answer the question\n", "next: select which of return will be the next step.\n", "```\n", "It includes the folowing named formats:\n", @@ -164,19 +167,21 @@ " - a short text representing a single thought, it does not have to be a proper sentence.\n", "[/INST]\n", "start:\n", - "> topic: arithmetic\n", - "> question: What is 3*4+9?\n", - "> choices: 16\n", - "> choices: 21\n", - "> choices: 39\n", - "> choices: 42\n", - "> work: \n", - "> answer: 42\n", - "next: return\n", + " topic: arithmetic\n", + " question: What is 3*4+9?\n", + " choices: 16\n", + " choices: 21\n", + " choices: 39\n", + " choices: 42\n", + " work:\n", + " work: 3*4 = 12\n", + " work: 12 + 9 = 21\n", + " answer: 3\n", + " next: return\n", "\n", "============================\n", "\n", - "p=0.6176012829154562\n", + "p=0.7895269044180848\n", "---------------------\n", "\n", "[INST] <>\n", @@ -186,11 +191,11 @@ "You are using the following syntax:\n", "```\n", "start:\n", - "> topic(text(20)): the general category from which the question was taken\n", - "> question(text(50)): the question that you have to answer\n", - "> choices(text(40))[4]: the four possible choices to answer the question, only one is correct\n", - "> work(thought)[1:10]: show your work step-by-step\n", - "> answer(repeat(choices)): you repeat verbatim the choice that best answer the question\n", + " topic(text(20)): the general category from which the question was taken\n", + " question(text(50)): the question that you have to answer\n", + " choices(text(40))[4]: the four possible choices to answer the question, only one is correct\n", + " work(thought)[1:10]: show your work step-by-step\n", + " answer(select(choices)): you pick the index of the choice that best answer the question\n", "next: select which of return will be the next step.\n", "```\n", "It includes the folowing named formats:\n", @@ -198,17 +203,17 @@ " - a short text representing a single thought, it does not have to be a proper sentence.\n", "[/INST]\n", "start:\n", - "> topic: arithmetic\n", - "> question: What is 3*4+9?\n", - "> choices: 16\n", - "> choices: 21\n", - "> choices: 39\n", - "> choices: 42\n", - "> work: \n", - "> work: 3*4 = 12\n", - "> work: 12 + 9 = 21\n", - "> answer: 21\n", - "next: return\n", + " topic: arithmetic\n", + " question: What is 3*4+9?\n", + " choices: 16\n", + " choices: 21\n", + " choices: 39\n", + " choices: 42\n", + " work:\n", + " work: 3*4 = 12\n", + " work: 12 + 9 = 21\n", + " answer: 2\n", + " next: return\n", "\n", "============================\n", "\n" @@ -216,185 +221,81 @@ } ], "source": [ + "# TODO inspect function...\n", "import numpy\n", "scoring = lambda probas: numpy.power(numpy.prod(probas), 1./len(probas))\n", - "texts = arch.orchestrator.pages[2].ftts['main'][-1].results(lm)\n", + "texts = arch.orchestrator.pages[-8].ftts['main'][-1].results(lm)\n", "for text in texts[-2:]:\n", " print(f\"p={text[1]}\\n---------------------\\n\\n{text[0]}\\n\\n============================\\n\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "dd5ec2ff-e157-4e82-bba6-4c5b11f52aea", "metadata": {}, "outputs": [], "source": [ - "arch.orchestrator.pages[-1].ftts['main'][-1].toGraphViz(lm)" + "# arch.orchestrator.pages[-1].ftts['main'][-1].toGraphViz(lm)" ] }, { "cell_type": "raw", - "id": "057d8ed3-6f73-48aa-ba54-4dea2adfe325", + "id": "0d79deeb-6d55-4223-b77c-20064340e49d", "metadata": { "tags": [] }, "source": [ - "# source = \"\"\"\n", - "# format bool {\n", - "# is enum(\"true\",\"false\");\n", - "# }\n", - "# prompt main {\n", - "# is {\n", - "# A is bool;\n", - "# B[0:2] is {\n", - "# C is bool;\n", - "# D[2:4] is bool;\n", - "# }\n", - "# E[0:2] is {\n", - "# F is bool;\n", - "# }\n", - "# G is {\n", - "# H is bool;\n", - "# I is bool;\n", - "# }\n", - "# J is bool;\n", - "# }\n", - "# channel {\n", - "# to .B.C from ?inC;\n", - "# to .E from ?inE;\n", - "# to .G from ?inG;\n", - "# }\n", - "# }\"\"\"\n", - "# samples = [\n", - "# (\n", - "# {},\n", - "# {\n", - "# \"inC\" : [ 'true', 'false' ],\n", - "# \"inE\" : [ { 'F' : 'true' } ],\n", - "# \"inG\" : { 'H' : 'true' }\n", - "# }\n", - "# ),\n", - "# (\n", - "# {},\n", - "# {\n", - "# \"inC\" : [ 'true' ],\n", - "# \"inE\" : [],\n", - "# \"inG\" : { 'H' : 'true' }\n", - "# }\n", - "# )\n", - "# ]\n", + "# This was a stress test for compilation phases (after frontend):\n", + "# - IR -> abstract (compile-time)\n", + "# - abstract -> concrete (compile-time)\n", + "# - concrete -> finite (runtime: need dataflow)\n", + "# Need to be moved to a test.\n", "\n", "source = \"\"\"\n", + "format bool {\n", + " is enum(\"true\",\"false\");\n", + "}\n", "prompt main {\n", " is {\n", - " topic is text<20>;\n", - " question is text<50>;\n", - " choices[4] is {\n", - " value is text<40>;\n", - " correct is enum(\"yes\",\"no\");\n", + " A is bool;\n", + " B[0:2] is {\n", + " C is bool;\n", + " D[2:4] is bool;\n", " }\n", - " answer is repeat(.choices.value);\n", - " }\n", - " channel {\n", - " to .topic from ?topic;\n", - " to .question from ?question;\n", - " to .choices.value from ?choices;\n", - " }\n", - " return {\n", - " from .answer;\n", - " }\n", - " annotate {\n", - " _ as \"You are answering a multiple choice questionnaire.\";\n", - " .topic as \"the general category from which the question was taken\";\n", - " .question as \"the question that you have to answer\";\n", - " .choices as \"you judge whether each choice is correct or not\";\n", - " .choices.value as \"the value of the choice\";\n", - " .choices.correct as \"you decide whether this choice is correct or not\";\n", - " .answer as \"you repeat the value of the choice that best answer the question\";\n", - " }\n", - "}\"\"\"\n", - "samples = [\n", - " (\n", - " {},\n", - " {\n", - " \"topic\" : \"the topic of the question\",\n", - " \"question\" : \"an exmaple question to show how it compiles\",\n", - " \"choices\" : [\n", - " \"the first choice\",\n", - " \"the 2nd choice\",\n", - " \"yet another 3rd choice\",\n", - " \"final 4th choice\"\n", - " ]\n", + " E[0:2] is {\n", + " F is bool;\n", " }\n", - " )\n", - "]\n", - "\n", - "source = \"\"\"\n", - "prompt main {\n", - " is {\n", - " choices[4] is text<40>;\n", - " thought[1:3] is text<5>;\n", - " answer is repeat(.choices);\n", + " G is {\n", + " H is bool;\n", + " I is bool;\n", + " }\n", + " J is bool;\n", " }\n", " channel {\n", - " to .choices from ?choices;\n", - " }\n", - " return {\n", - " from .answer;\n", - " }\n", - " annotate {\n", - " _ as \"You are answering a multiple choice questionnaire.\";\n", - " .choices as \"possible choices\";\n", - " .thought as \"think about the choices\";\n", - " .answer as \"repeat the correct choice\";\n", + " to .B.C from ?inC;\n", + " to .E from ?inE;\n", + " to .G from ?inG;\n", " }\n", "}\"\"\"\n", "samples = [\n", " (\n", - " {},\n", - " {\n", - " \"choices\" : [\n", - " \"first\",\n", - " \"second\",\n", - " \"third\",\n", - " \"fourth\"\n", - " ]\n", - " }\n", + " {},\n", + " {\n", + " \"inC\" : [ 'true', 'false' ],\n", + " \"inE\" : [ { 'F' : 'true' } ],\n", + " \"inG\" : { 'H' : 'true' }\n", + " }\n", + " ),\n", + " (\n", + " {},\n", + " {\n", + " \"inC\" : [ 'true' ],\n", + " \"inE\" : [],\n", + " \"inG\" : { 'H' : 'true' }\n", + " }\n", " )\n", - "]\n", - "\n", - "# source = \"\"\"\n", - "# prompt main {\n", - "# is {\n", - "# choices[4] is text<40>;\n", - "# answer is repeat(.choices);\n", - "# }\n", - "# channel {\n", - "# to .choices from ?choices;\n", - "# }\n", - "# return {\n", - "# from .answer;\n", - "# }\n", - "# annotate {\n", - "# _ as \"You are answering a multiple choice questionnaire.\";\n", - "# .choices as \"possible choices\";\n", - "# .answer as \"repeat the correct choice\";\n", - "# }\n", - "# }\"\"\"\n", - "# samples = [\n", - "# (\n", - "# {},\n", - "# {\n", - "# \"choices\" : [\n", - "# \"first\",\n", - "# \"second\",\n", - "# \"third\",\n", - "# \"fourth\"\n", - "# ]\n", - "# }\n", - "# )\n", - "# ]" + "]" ] } ], diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000..ef043bf --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1 @@ +**/*-results.json