From 01686c9c622565cfd198e99ef0f49032e357fccd Mon Sep 17 00:00:00 2001
From: Tristan Vanderbruggen <vanderbruggentristan@gmail.com>
Date: Tue, 5 Mar 2024 03:20:57 +0000
Subject: [PATCH] Added markdow files for docs.

---
 README.md        | 187 +++---------------------------
 docs/language.md |  48 ++++++++
 docs/roadmap.md  |  30 +++++
 docs/setup.md    |  36 ++++++
 docs/tutorial.md |   4 +
 docs/usage.md    |  71 ++++++++++++
 share/wip.ipynb  | 293 ++++++++++++++++-------------------------------
 tests/.gitignore |   1 +
 8 files changed, 300 insertions(+), 370 deletions(-)
 create mode 100644 docs/language.md
 create mode 100644 docs/roadmap.md
 create mode 100644 docs/setup.md
 create mode 100644 docs/tutorial.md
 create mode 100644 docs/usage.md
 create mode 100644 tests/.gitignore

diff --git a/README.md b/README.md
index 2e62bda..39b8459 100644
--- a/README.md
+++ b/README.md
@@ -1,184 +1,23 @@
 &#9881; Automaton & Cognition
 =============================
 
-[![PIP](https://github.com/LLNL/AutoCog/workflows/pip/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions)
-[![Frontend](https://github.com/LLNL/AutoCog/workflows/frontend/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions)
-[![CLI](https://github.com/LLNL/AutoCog/actions/workflows/cli.yml/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions)
-
-Auotmaton & Cognition explores mechanisms to build automaton that drive cognitive processes.
-To this end, we defined a programming model, Structured Thoughts, with a language that compiles to a set of automaton.
-
-## Structured Thoughts
-
-In the Structured Thoughts programming model, prompts are akin to the building blocks of traditional computer programs.
-Prompts are compiled to automaton that ensure that the resulting completion can be parsed to extract structured data.
-Branching between prompts is controlled by the language model.
-The dataflow is statically defined and executed when instantiating the automaton of each prompt.
-Calls (to other prompts or python tools) are executed during the dataflow phase.
-
-Below, we show a single prompt program which implement Chain-of-Thoughts (CoT) to answer a multiple choice question.
-In this examples, the language model is presented with the `topic`, the `question`, and four `choices`.
-It can then think using one to ten `thought` (up 20 tokens for each).
-Eventually, the model must indicate the index of the correct choice.
-
-```
-format thought {
-    is text<20>;
-    annotate f"a short text representing a single thought, it does not have to be a proper sentence.";
-}
-
-prompt main {
-    is {
-        topic is text<20>;
-        question is text<50>;
-        choices[4] is text<40>;
-        work[1:10] is thought;
-        answer is select(.choices);
-    }
-    channel {
-        to .topic    from ?topic;
-        to .question from ?question;
-        to .choices  from ?choices;
-    }
-    return {
-        from .answer;
-    }
-    annotate {
-        _ as "You are answering a multiple choice questionnaire.";
-        .topic           as "the general category from which the question was taken";
-        .question        as "the question that you have to answer";
-        .choices         as "the four possible choices to answer the question, only one is correct";
-        .work            as "show your work step-by-step";
-        .answer          as "you pick the index of the choice that best answer the question";
-    }
-}
-```
-
-We are developing the [MCQ](./library/mcq) library of program to illustrate thought patterns that are achievable using Structured Thoughts.
-
-## Getting started
-
-### Install
-
-As simple as `pip install -U git+https://github.com/LLNL/AutoCog`.
-
-But, you'll probably want to clone the repository to get the library of programs:
-```
-git clone https://github.com/LLNL/AutoCog
-pip install -U ./AutoCog
-```
-
-### LLM Setup
-
-#### LLama.cpp and GGUF models
-
-We download model from [TheBloke](https://huggingface.co/TheBloke) on Hugging Face.
-For example, you can donwload LlaMa 2 with 7B parameters and tuned for Chat with:
-```
-wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
-```
-This is a 4 bits version, aka `Q4_K_M` in the name. It is the main model we use for testing.
-
-To run GGUF model, we use a [modified version](https://github.com/tristanvdb/llama-cpp-python/tree/choice-dev) of the `llama-cpp-python` package.
-It provides python bindings and will build `LLama.cpp`.
-Our changes permit us to implement `greedy` completion (returning logprob for all tokens).
-```
-pip install -y git+https://github.com/tristanvdb/llama-cpp-python@choice-dev
-```
-
-> TODO v0.5: connect to low-level API in `llama-cpp-python` so that we can use the default release
-
-#### HuggingFace Transformers
-
-> TODO v0.6: connection for HuggingFace Transformers package (use to have it but not tested)
-
-### Inside a Notebook
-
-Most of the development is done inside Python notebook (jupiterlab).
-Eventually, several notebooks demonstrating various part of AutoCog will be provided in the [share](./share) folder.
-To get an idea of our progress, take a look at the [WIP Notebook](./share/wip.ipynb).
-
-### Command line
-
-We are building a command line tool to use AutoCog.
-
-`python3 -m autocog --help`
-
-```
-usage: __main__.py [-h] [--version] [--orch ORCH] [--gguf GGUF] [--gguf-ctx GGUF_CTX] [--syntax SYNTAX] [--cogs COGS] [--command COMMAND] [--output OUTPUT] [--prefix PREFIX] [--serve] [--host HOST] [--port PORT] [--debug]
-
-optional arguments:
-  -h, --help           show this help message and exit
-  --version            show program's version number and exit
-  --orch ORCH          Type of orchestrator: `serial` or `async`. (default: serial)
-  --gguf GGUF          Load a model from a GGUF file using llama.cpp (and llama-cpp-python) (default: None)
-  --gguf-ctx GGUF_CTX  Context size for GGUF models (default: 4096)
-  --syntax SYNTAX      One of `Llama-2-Chat`, `ChatML`, `Guanaco` or a dictionary of the kwargs to initialize a Syntax object (inlined JSON or path to a file). (default: None)
-  --cogs COGS          Files to load as cog in the architecture, prefix with its identifier else the filename is used. For example, `some/cognitive/mcq.sta` and `my.tool:some/python/tool.py` will load a Structured Thought
-                       Automaton as `mcq` and a Python file as `my.tool`. (default: None)
-  --command COMMAND    Command to be executed by the architecture as a dictionary. `__tag` identify the cog while `__entry` identify the entry point in this cog (defaults to `main`). All other field will be forwarded as
-                       keyworded args. Example: `{ "__tag" : "writer", "__entry" : "main", **kwarg }` (inlined JSON or path to a file). Can also provide one or more list of dictionary. (default: None)
-  --output OUTPUT      Directory where results are stored. (default: /home/tristan/projects/LLM/AutoCog)
-  --prefix PREFIX      String to identify this instance of AutoCog (default: autocog)
-  --serve              Whether to launch the flask server. (default: False)
-  --host HOST          Host for flask server. (default: localhost)
-  --port PORT          Port for flask server. (default: 5000)
-  --debug              Whether to run the flask server in debug mode. (default: False)
-```
-
-Some examples:
-```
-python3 -m autocog --gguf /data/models/tinyllama-2-1b-miniguanaco.Q4_K_M.gguf --syntax Guanaco \
-                   --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta \
-                   --command '{ "__tag" : "mmlu.repeat_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }'
-```
-```
-python3 -m autocog --gguf /data/models/llama-2-7b-chat.Q4_K_M.gguf --syntax Llama-2-Chat \
-                   --syntax '{ "prompt_with_format" : false, "prompt_with_index" : false, "prompt_indent" : "" }' \
-                   --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta \
-                   --cogs mmlu.select_cot:library/mmlu-exams/select-cot.sta \
-                   --command '{ "__tag" : "mmlu.repeat_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }' \
-                   --command '{ "__tag" : "mmlu.select_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }'
-```
-
-Currently, the AutoCog application only saves the output of the commands in a JSON file.
-
-> TODO v0.5: saving the "pages"
-
-### Web Application
-
-The goal is to provide a development environment.
-Particularly, the ability to inspect and edit/replay `frames`.
-These are created for each execution of an `Automaton` (nested when an `Automaton` call another `Automaton`).
-Upon ending, the execution trace of the `Automaton` is saved in the corresponding frame.
-
-Eventually, we want to use these traces for two purposes:
- - replay: edit part of the trace then restart the program from that point
- - finetuning: select "succesful" frames to finetune models
-
-Run the command below at the root of the repository to launch a server. It uses [quart](http://pgjones.gitlab.io/quart).
-```
-python3 -m autocog --serve --host 0.0.0.0 --port 5000 --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta
-```
-
-### Testing
-
-Currently only pushes to selected branches trigger GitHub actions.
-The results for `master` are shown at the top of this README.
-
-We run three tests:
- - `pip install`
- - Structured Thoughts frontend (parsing some non-sensical but lexicographically correct sample of the language)
- - AutoCog CLI to load the MMLU-Exams and run a very simple query 
-
-Currently, tests involving a model use the Random Language Model ([see rambling here](./tests/cli-mmlu.sh)).
-Looking for alternative to making the GitHub action download Llama 2 (7b, Chat, Q4_K_M) which I use for testing.
-
 |   | PIP | Frontend | CLI |
 |---|---|---|---|
 | `master` | [![PIP](https://github.com/LLNL/AutoCog/workflows/pip/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions) | [![Frontend](https://github.com/LLNL/AutoCog/workflows/frontend/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions) | [![CLI](https://github.com/LLNL/AutoCog/actions/workflows/cli.yml/badge.svg?branch=master)](https://github.com/LLNL/AutoCog/actions) |
 | `devel` | [![PIP](https://github.com/LLNL/AutoCog/workflows/pip/badge.svg?branch=devel)](https://github.com/LLNL/AutoCog/actions) | [![Frontend](https://github.com/LLNL/AutoCog/workflows/frontend/badge.svg?branch=devel)](https://github.com/LLNL/AutoCog/actions) | [![CLI](https://github.com/LLNL/AutoCog/actions/workflows/cli.yml/badge.svg?branch=devel)](https://github.com/LLNL/AutoCog/actions) |
 
+Automaton & Cognition explores mechanisms to build automaton that control applications driven by auto-regressive language models.
+To this end, we defined a programming model, Structured Thoughts, with a language that compiles to a set of automaton.
+
+We broke down the documentation into a few files:
+ - [setup](./docs/setup.md)
+ - [usage](./docs/usage.md)
+ - [language](./docs/language.md)
+ - [tutorial](./docs/tutorial.md)
+ - [roadmap](./docs/roadmap.md)
+
+The libraries have [their own documentation](./library/README.md).
+
 ## Contributing
 
 Contributions are welcome!
@@ -186,7 +25,7 @@ Contributions are welcome!
 So far there is only one rule: **linear git history** (no merge commits).
 Only the master branch have stable commits, other branches might be rebased without notice.
 
-Version number should increase for each push to master and have a matching tag.
+Version number should increase for each push to `master` and have a matching tag.
 
 ## License
 
diff --git a/docs/language.md b/docs/language.md
new file mode 100644
index 0000000..26fe5ed
--- /dev/null
+++ b/docs/language.md
@@ -0,0 +1,48 @@
+Structured Thoughts
+===================
+
+In the Structured Thoughts programming model, prompts are akin to the building blocks of traditional computer programs.
+Prompts are compiled to automaton that ensure that the resulting completion can be parsed to extract structured data.
+Branching between prompts is controlled by the language model.
+The dataflow is statically defined and executed when instantiating the automaton of each prompt.
+Calls (to other prompts or python tools) are executed during the dataflow phase.
+
+Below, we show a single prompt program which implement Chain-of-Thoughts (CoT) to answer a multiple choice question.
+In this examples, the language model is presented with the `topic`, the `question`, and four `choices`.
+It can then think using one to ten `thought` (up 20 tokens for each).
+Eventually, the model must indicate the index of the correct choice.
+
+```
+format thought {
+    is text<20>;
+    annotate f"a short text representing a single thought, it does not have to be a proper sentence.";
+}
+
+prompt main {
+    is {
+        topic is text<20>;
+        question is text<50>;
+        choices[4] is text<40>;
+        work[1:10] is thought;
+        answer is select(.choices);
+    }
+    channel {
+        to .topic    from ?topic;
+        to .question from ?question;
+        to .choices  from ?choices;
+    }
+    return {
+        from .answer;
+    }
+    annotate {
+        _ as "You are answering a multiple choice questionnaire.";
+        .topic           as "the general category from which the question was taken";
+        .question        as "the question that you have to answer";
+        .choices         as "the four possible choices to answer the question, only one is correct";
+        .work            as "show your work step-by-step";
+        .answer          as "you pick the index of the choice that best answer the question";
+    }
+}
+```
+
+We are developing the [MCQ](./library/mcq) library of program to illustrate thought patterns that are achievable using Structured Thoughts.
\ No newline at end of file
diff --git a/docs/roadmap.md b/docs/roadmap.md
new file mode 100644
index 0000000..8e1503d
--- /dev/null
+++ b/docs/roadmap.md
@@ -0,0 +1,30 @@
+Roadmap
+=======
+
+This is a roadmap of basic features that are needed to make AutoCog (and STA) usable.
+It only considers a few weeks worth of work but I rarely have brain-cycles to work on this project.
+
+Given that I am currently working alone on this project, I am not tracking work using issues and milestones.
+
+In the roadmap below, each minor versions consolidate the increment of the previous ones.
+Simply, all the `v0.4.X` are steps toward `v0.5`.
+These bugfix level milestones are subject to reordering (change of priority) and shifting (introducing new milestone or actual bugfixes).
+
+| Version | Features | Notes | Tracking |
+| ------- | -------- | ----- | -------- |
+| v0.4    | Structured Thoughts | release 1st version of ST |  |
+| v0.4.1  | Tests & Fixes | Testing more LLMs and fix tokenizations issue |  |
+| v0.4.2  | Roadmap & Doc | Needed some organization... |  |
+| v0.4.3  | Low-Level llama-cpp-python |  |  |
+| v0.4.4  | FTA: Simplify, Choice Limit, and Norms  |  |  |
+| v0.4.5  | Beam Search | Implementation within FTA |  |
+| v0.5    | Language Docs | Description of the language and tutorial |  |
+| v0.5.1  | Tests & Fixes | Expecting that it will be needed... |  |
+| v0.5.2  | Unified FTA | FTA in one "loop" using llama-cpp-python low-level API |  |
+| v0.5.3  | Elementary | Library of elementary "worksheet" (arithmetic: add/mul/div, literacy: spelling, grammar, comprehension) |  |
+| v0.5.4  | MMLU-Exams | Library of MCQ Solver using different Thought Patterns |  |
+| v0.5.5  | FTA to BNF | Translate FTA to llama.cpp BNF |  |
+| v0.6    | Benchmarking | Evaluate speed and accuracy on Elementary and MMLU-Exams |  |
+| v0.6.1  | Tooling Benchmark |  |  |
+| v0.7    | Finetuning   | Selected foundation LLMs targetting improved performance at MMLU-Exams |  |
+| v0.7.1  | Finetune Tooling |  |  |
diff --git a/docs/setup.md b/docs/setup.md
new file mode 100644
index 0000000..7211275
--- /dev/null
+++ b/docs/setup.md
@@ -0,0 +1,36 @@
+Setup
+=====
+
+## Install AutoCog
+
+As simple as `pip install -U git+https://github.com/LLNL/AutoCog`.
+
+But, you'll probably want to clone the repository to get the library of programs:
+```
+git clone https://github.com/LLNL/AutoCog
+pip install -U ./AutoCog
+```
+
+## LLM Setup
+
+### LLama.cpp and GGUF models
+
+We download model from [TheBloke](https://huggingface.co/TheBloke) on Hugging Face.
+For example, you can donwload LlaMa 2 with 7B parameters and tuned for Chat with:
+```
+wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+```
+This is a 4 bits version, aka `Q4_K_M` in the name. It is the main model we use for testing.
+
+To run GGUF model, we use a [modified version](https://github.com/tristanvdb/llama-cpp-python/tree/choice-dev) of the `llama-cpp-python` package.
+It provides python bindings and will build `LLama.cpp`.
+Our changes permit us to implement `greedy` completion (returning logprob for all tokens).
+```
+pip install -y git+https://github.com/tristanvdb/llama-cpp-python@choice-dev
+```
+
+> TODO v0.5: connect to low-level API in `llama-cpp-python` so that we can use the default release
+
+### HuggingFace Transformers
+
+> TODO v0.6: connection for HuggingFace Transformers package (use to have it but not tested)
\ No newline at end of file
diff --git a/docs/tutorial.md b/docs/tutorial.md
new file mode 100644
index 0000000..82dd4f8
--- /dev/null
+++ b/docs/tutorial.md
@@ -0,0 +1,4 @@
+Tutorial
+========
+
+
diff --git a/docs/usage.md b/docs/usage.md
new file mode 100644
index 0000000..87cb07b
--- /dev/null
+++ b/docs/usage.md
@@ -0,0 +1,71 @@
+Usage
+=====
+
+## Inside a Notebook
+
+Most of the development is done inside Python notebook (jupiterlab).
+Eventually, several notebooks demonstrating various part of AutoCog will be provided in the [share](./share) folder.
+To get an idea of our progress, take a look at the [WIP Notebook](./share/wip.ipynb).
+
+## Command line
+
+We are building a command line tool to use AutoCog.
+
+`python3 -m autocog --help`
+
+```
+usage: __main__.py [-h] [--version] [--orch ORCH] [--gguf GGUF] [--gguf-ctx GGUF_CTX] [--syntax SYNTAX] [--cogs COGS] [--command COMMAND] [--output OUTPUT] [--prefix PREFIX] [--serve] [--host HOST] [--port PORT] [--debug]
+
+optional arguments:
+  -h, --help           show this help message and exit
+  --version            show program's version number and exit
+  --orch ORCH          Type of orchestrator: `serial` or `async`. (default: serial)
+  --gguf GGUF          Load a model from a GGUF file using llama.cpp (and llama-cpp-python) (default: None)
+  --gguf-ctx GGUF_CTX  Context size for GGUF models (default: 4096)
+  --syntax SYNTAX      One of `Llama-2-Chat`, `ChatML`, `Guanaco` or a dictionary of the kwargs to initialize a Syntax object (inlined JSON or path to a file). (default: None)
+  --cogs COGS          Files to load as cog in the architecture, prefix with its identifier else the filename is used. For example, `some/cognitive/mcq.sta` and `my.tool:some/python/tool.py` will load a Structured Thought
+                       Automaton as `mcq` and a Python file as `my.tool`. (default: None)
+  --command COMMAND    Command to be executed by the architecture as a dictionary. `__tag` identify the cog while `__entry` identify the entry point in this cog (defaults to `main`). All other field will be forwarded as
+                       keyworded args. Example: `{ "__tag" : "writer", "__entry" : "main", **kwarg }` (inlined JSON or path to a file). Can also provide one or more list of dictionary. (default: None)
+  --output OUTPUT      Directory where results are stored. (default: /home/tristan/projects/LLM/AutoCog)
+  --prefix PREFIX      String to identify this instance of AutoCog (default: autocog)
+  --serve              Whether to launch the flask server. (default: False)
+  --host HOST          Host for flask server. (default: localhost)
+  --port PORT          Port for flask server. (default: 5000)
+  --debug              Whether to run the flask server in debug mode. (default: False)
+```
+
+Some examples:
+```
+python3 -m autocog --gguf /data/models/tinyllama-2-1b-miniguanaco.Q4_K_M.gguf --syntax Guanaco \
+                   --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta \
+                   --command '{ "__tag" : "mmlu.repeat_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }'
+```
+```
+python3 -m autocog --gguf /data/models/llama-2-7b-chat.Q4_K_M.gguf --syntax Llama-2-Chat \
+                   --syntax '{ "prompt_with_format" : false, "prompt_with_index" : false, "prompt_indent" : "" }' \
+                   --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta \
+                   --cogs mmlu.select_cot:library/mmlu-exams/select-cot.sta \
+                   --command '{ "__tag" : "mmlu.repeat_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }' \
+                   --command '{ "__tag" : "mmlu.select_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }'
+```
+
+Currently, the AutoCog application only saves the output of the commands in a JSON file.
+
+> TODO v0.5: saving the "pages"
+
+## Web Application
+
+The goal is to provide a development environment.
+Particularly, the ability to inspect and edit/replay `frames`.
+These are created for each execution of an `Automaton` (nested when an `Automaton` call another `Automaton`).
+Upon ending, the execution trace of the `Automaton` is saved in the corresponding frame.
+
+Eventually, we want to use these traces for two purposes:
+ - replay: edit part of the trace then restart the program from that point
+ - finetuning: select "succesful" frames to finetune models
+
+Run the command below at the root of the repository to launch a server. It uses [quart](http://pgjones.gitlab.io/quart).
+```
+python3 -m autocog --serve --host 0.0.0.0 --port 5000 --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta
+```
diff --git a/share/wip.ipynb b/share/wip.ipynb
index d2efa68..0bfdef9 100644
--- a/share/wip.ipynb
+++ b/share/wip.ipynb
@@ -37,16 +37,18 @@
     "model_names = [\n",
     "    'llama-2-7b.Q4_K_M',                       # wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_M.gguf\n",
     "    'llama-2-7b-chat.Q4_K_M',                  # wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf\n",
+    "    'llama-2-13b-chat.Q4_K_M',                 # wget https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_K_M.gguf\n",
+    "    'llama-2-13b-chat.Q5_K_M',                 # wget https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q5_K_M.gguf\n",
     "    'llama-2-13b-chat.Q8_0',                   # wget https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q8_0.gguf\n",
     "    'tinyllama-1.1b-chat-v0.3.Q4_K_M',         # wget https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf\n",
     "    'tinyllama-2-1b-miniguanaco.Q4_K_M',       # wget https://huggingface.co/TheBloke/Tinyllama-2-1b-miniguanaco-GGUF/resolve/main/tinyllama-2-1b-miniguanaco.Q4_K_M.gguf\n",
     "    'capybarahermes-2.5-mistral-7b.Q4_K_M'     # wget https://huggingface.co/TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF/resolve/main/capybarahermes-2.5-mistral-7b.Q4_K_M.gguf\n",
     "]\n",
     "(lm,syntax) = loader(\n",
-    "    models_path=f\"/data/models/{model_names[2]}.gguf\",\n",
+    "    models_path=f\"/data/models/{model_names[1]}.gguf\",\n",
     "    prompt_with_format=False,\n",
     "    prompt_with_index=False,\n",
-    "#    prompt_indent='> '\n",
+    "    prompt_indent='  '\n",
     ")"
    ]
   },
@@ -60,16 +62,16 @@
     "arch = CogArch(lm=lm, syntax=syntax)\n",
     "\n",
     "mmlu_cogs = [\n",
-    "    arch.load('mmlu-repeat',                filepath=f\"{autocog_home}/library/mmlu-exams/repeat.sta\"),\n",
-    "    arch.load('mmlu-repeat-cot',            filepath=f\"{autocog_home}/library/mmlu-exams/repeat-cot.sta\"),\n",
-    "    arch.load('mmlu-repeat-hyp',            filepath=f\"{autocog_home}/library/mmlu-exams/repeat-hyp.sta\"),\n",
-    "    arch.load('mmlu-repeat-iter',           filepath=f\"{autocog_home}/library/mmlu-exams/repeat-iter.sta\"),\n",
-    "    arch.load('mmlu-repeat-annot',          filepath=f\"{autocog_home}/library/mmlu-exams/repeat-annot.sta\"),\n",
-    "    arch.load('mmlu-select',                filepath=f\"{autocog_home}/library/mmlu-exams/select.sta\"),\n",
-    "    arch.load('mmlu-select-cot',            filepath=f\"{autocog_home}/library/mmlu-exams/select-cot.sta\"),\n",
-    "    arch.load('mmlu-select-hyp',            filepath=f\"{autocog_home}/library/mmlu-exams/select-hyp.sta\"),\n",
-    "    arch.load('mmlu-select-iter',           filepath=f\"{autocog_home}/library/mmlu-exams/select-iter.sta\"),\n",
-    "    arch.load('mmlu-select-annot',          filepath=f\"{autocog_home}/library/mmlu-exams/select-annot.sta\")\n",
+    "    arch.load('mcq.repeat.base',  filepath=f\"@mcq/repeat.sta\"),\n",
+    "    arch.load('mcq.repeat.cot',   filepath=f\"@mcq/repeat-cot.sta\"),\n",
+    "    arch.load('mcq.repeat.hyp',   filepath=f\"@mcq/repeat-hyp.sta\"),\n",
+    "    arch.load('mcq.repeat.iter',  filepath=f\"@mcq/repeat-iter.sta\"),\n",
+    "    arch.load('mcq.repeat.annot', filepath=f\"@mcq/repeat-annot.sta\"),\n",
+    "    arch.load('mcq.select.base',  filepath=f\"@mcq/select.sta\"),\n",
+    "    arch.load('mcq.select.cot',   filepath=f\"@mcq/select-cot.sta\"),\n",
+    "    arch.load('mcq.select.hyp',   filepath=f\"@mcq/select-hyp.sta\"),\n",
+    "    arch.load('mcq.select.iter',  filepath=f\"@mcq/select-iter.sta\"),\n",
+    "    arch.load('mcq.select.annot', filepath=f\"@mcq/select-annot.sta\")\n",
     "]\n",
     "\n",
     "mmlu_data = [\n",
@@ -81,8 +83,8 @@
     "]\n",
     "\n",
     "arith_cogs = [\n",
-    "    arch.load('arithmetic-multiply-single', filepath=f\"{autocog_home}/library/arithmetic/multiply-single.sta\"),\n",
-    "    arch.load('arithmetic-multiply-chain',  filepath=f\"{autocog_home}/library/arithmetic/multiply-chain.sta\")\n",
+    "    arch.load('elementary.multiply.single', filepath=f\"@elementary/multiply-single.sta\"),\n",
+    "    arch.load('elementary.multiply.chain',  filepath=f\"@elementary/multiply-chain.sta\")\n",
     "]\n",
     "\n",
     "arith_data = [\n",
@@ -114,7 +116,7 @@
     {
      "data": {
       "text/plain": [
-       "['39', '21', '42', '21', '16', 3, 2, 3, 3, 2]"
+       "['42', '21', '42', '16', '42', 2, 2, 2, 2, 2]"
       ]
      },
      "execution_count": 5,
@@ -124,9 +126,10 @@
    ],
    "source": [
     "tags = [\n",
-    "    'mmlu-repeat', 'mmlu-repeat-cot', 'mmlu-repeat-hyp', 'mmlu-repeat-iter', 'mmlu-repeat-annot',\n",
-    "    'mmlu-select', 'mmlu-select-cot', 'mmlu-select-hyp', 'mmlu-select-iter', 'mmlu-select-annot'\n",
+    "    'mcq.repeat.base', 'mcq.repeat.cot', 'mcq.repeat.hyp', 'mcq.repeat.iter', 'mcq.repeat.annot',\n",
+    "    'mcq.select.base', 'mcq.select.cot', 'mcq.select.hyp', 'mcq.select.iter', 'mcq.select.annot'\n",
     "]\n",
+    "# tags = [ 'mmlu-repeat-cot', 'mmlu-select-cot' ]\n",
     "results = [ await arch(tag, **data) for tag in tags for data in mmlu_data ]\n",
     "# arith_results = [ await cog(**data) for cog in arith_cogs for data in arith_data ]\n",
     "results"
@@ -134,7 +137,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 6,
    "id": "4fd250b0-58d1-4d09-8a8d-d860af8a8493",
    "metadata": {},
    "outputs": [
@@ -142,7 +145,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "p=0.5946437829906105\n",
+      "p=0.6638551711390864\n",
       "---------------------\n",
       "\n",
       "[INST] <<SYS>>\n",
@@ -152,11 +155,11 @@
       "You are using the following syntax:\n",
       "```\n",
       "start:\n",
-      "> topic(text(20)): the general category from which the question was taken\n",
-      "> question(text(50)): the question that you have to answer\n",
-      "> choices(text(40))[4]: the four possible choices to answer the question, only one is correct\n",
-      "> work(thought)[1:10]: show your work step-by-step\n",
-      "> answer(repeat(choices)): you repeat verbatim the choice that best answer the question\n",
+      "  topic(text(20)): the general category from which the question was taken\n",
+      "  question(text(50)): the question that you have to answer\n",
+      "  choices(text(40))[4]: the four possible choices to answer the question, only one is correct\n",
+      "  work(thought)[1:10]: show your work step-by-step\n",
+      "  answer(select(choices)): you pick the index of the choice that best answer the question\n",
       "next: select which of return will be the next step.\n",
       "```\n",
       "It includes the folowing named formats:\n",
@@ -164,19 +167,21 @@
       "  - a short text representing a single thought, it does not have to be a proper sentence.\n",
       "[/INST]\n",
       "start:\n",
-      "> topic: arithmetic\n",
-      "> question: What is 3*4+9?\n",
-      "> choices: 16\n",
-      "> choices: 21\n",
-      "> choices: 39\n",
-      "> choices: 42\n",
-      "> work: \n",
-      "> answer: 42\n",
-      "next: return\n",
+      "   topic: arithmetic\n",
+      "   question: What is 3*4+9?\n",
+      "   choices: 16\n",
+      "   choices: 21\n",
+      "   choices: 39\n",
+      "   choices: 42\n",
+      "   work:\n",
+      "   work: 3*4 = 12\n",
+      "   work: 12 + 9 = 21\n",
+      "   answer: 3\n",
+      " next:  return\n",
       "\n",
       "============================\n",
       "\n",
-      "p=0.6176012829154562\n",
+      "p=0.7895269044180848\n",
       "---------------------\n",
       "\n",
       "[INST] <<SYS>>\n",
@@ -186,11 +191,11 @@
       "You are using the following syntax:\n",
       "```\n",
       "start:\n",
-      "> topic(text(20)): the general category from which the question was taken\n",
-      "> question(text(50)): the question that you have to answer\n",
-      "> choices(text(40))[4]: the four possible choices to answer the question, only one is correct\n",
-      "> work(thought)[1:10]: show your work step-by-step\n",
-      "> answer(repeat(choices)): you repeat verbatim the choice that best answer the question\n",
+      "  topic(text(20)): the general category from which the question was taken\n",
+      "  question(text(50)): the question that you have to answer\n",
+      "  choices(text(40))[4]: the four possible choices to answer the question, only one is correct\n",
+      "  work(thought)[1:10]: show your work step-by-step\n",
+      "  answer(select(choices)): you pick the index of the choice that best answer the question\n",
       "next: select which of return will be the next step.\n",
       "```\n",
       "It includes the folowing named formats:\n",
@@ -198,17 +203,17 @@
       "  - a short text representing a single thought, it does not have to be a proper sentence.\n",
       "[/INST]\n",
       "start:\n",
-      "> topic: arithmetic\n",
-      "> question: What is 3*4+9?\n",
-      "> choices: 16\n",
-      "> choices: 21\n",
-      "> choices: 39\n",
-      "> choices: 42\n",
-      "> work: \n",
-      "> work: 3*4 = 12\n",
-      "> work: 12 + 9 = 21\n",
-      "> answer: 21\n",
-      "next: return\n",
+      "   topic: arithmetic\n",
+      "   question: What is 3*4+9?\n",
+      "   choices: 16\n",
+      "   choices: 21\n",
+      "   choices: 39\n",
+      "   choices: 42\n",
+      "   work:\n",
+      "   work: 3*4 = 12\n",
+      "   work: 12 + 9 = 21\n",
+      "   answer: 2\n",
+      " next:  return\n",
       "\n",
       "============================\n",
       "\n"
@@ -216,185 +221,81 @@
     }
    ],
    "source": [
+    "# TODO inspect function...\n",
     "import numpy\n",
     "scoring = lambda probas: numpy.power(numpy.prod(probas), 1./len(probas))\n",
-    "texts = arch.orchestrator.pages[2].ftts['main'][-1].results(lm)\n",
+    "texts = arch.orchestrator.pages[-8].ftts['main'][-1].results(lm)\n",
     "for text in texts[-2:]:\n",
     "    print(f\"p={text[1]}\\n---------------------\\n\\n{text[0]}\\n\\n============================\\n\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "dd5ec2ff-e157-4e82-bba6-4c5b11f52aea",
    "metadata": {},
    "outputs": [],
    "source": [
-    "arch.orchestrator.pages[-1].ftts['main'][-1].toGraphViz(lm)"
+    "# arch.orchestrator.pages[-1].ftts['main'][-1].toGraphViz(lm)"
    ]
   },
   {
    "cell_type": "raw",
-   "id": "057d8ed3-6f73-48aa-ba54-4dea2adfe325",
+   "id": "0d79deeb-6d55-4223-b77c-20064340e49d",
    "metadata": {
     "tags": []
    },
    "source": [
-    "# source = \"\"\"\n",
-    "# format bool {\n",
-    "#     is enum(\"true\",\"false\");\n",
-    "# }\n",
-    "# prompt main {\n",
-    "#     is {\n",
-    "#         A is bool;\n",
-    "#         B[0:2] is {\n",
-    "#             C is bool;\n",
-    "#             D[2:4] is bool;\n",
-    "#         }\n",
-    "#         E[0:2] is {\n",
-    "#           F is bool;\n",
-    "#         }\n",
-    "#         G is {\n",
-    "#           H is bool;\n",
-    "#           I is bool;\n",
-    "#         }\n",
-    "#         J is bool;\n",
-    "#     }\n",
-    "#     channel {\n",
-    "#         to .B.C from ?inC;\n",
-    "#         to .E   from ?inE;\n",
-    "#         to .G   from ?inG;\n",
-    "#     }\n",
-    "# }\"\"\"\n",
-    "# samples = [\n",
-    "#     (\n",
-    "#       {},\n",
-    "#       {\n",
-    "#         \"inC\" : [ 'true', 'false' ],\n",
-    "#         \"inE\" : [ { 'F' : 'true' } ],\n",
-    "#         \"inG\" : { 'H' : 'true' }\n",
-    "#       }\n",
-    "#     ),\n",
-    "#     (\n",
-    "#       {},\n",
-    "#       {\n",
-    "#         \"inC\" : [ 'true' ],\n",
-    "#         \"inE\" : [],\n",
-    "#         \"inG\" : { 'H' : 'true' }\n",
-    "#       }\n",
-    "#     )\n",
-    "# ]\n",
+    "# This was a stress test for compilation phases (after frontend):\n",
+    "#  - IR -> abstract       (compile-time)\n",
+    "#  - abstract -> concrete (compile-time)\n",
+    "#  - concrete -> finite   (runtime: need dataflow)\n",
+    "# Need to be moved to a test.\n",
     "\n",
     "source = \"\"\"\n",
+    "format bool {\n",
+    "    is enum(\"true\",\"false\");\n",
+    "}\n",
     "prompt main {\n",
     "    is {\n",
-    "        topic is text<20>;\n",
-    "        question is text<50>;\n",
-    "        choices[4] is {\n",
-    "            value is text<40>;\n",
-    "            correct is enum(\"yes\",\"no\");\n",
+    "        A is bool;\n",
+    "        B[0:2] is {\n",
+    "            C is bool;\n",
+    "            D[2:4] is bool;\n",
     "        }\n",
-    "        answer is repeat(.choices.value);\n",
-    "    }\n",
-    "    channel {\n",
-    "        to .topic         from ?topic;\n",
-    "        to .question      from ?question;\n",
-    "        to .choices.value from ?choices;\n",
-    "    }\n",
-    "    return {\n",
-    "        from .answer;\n",
-    "    }\n",
-    "    annotate {\n",
-    "        _ as \"You are answering a multiple choice questionnaire.\";\n",
-    "        .topic           as \"the general category from which the question was taken\";\n",
-    "        .question        as \"the question that you have to answer\";\n",
-    "        .choices         as \"you judge whether each choice is correct or not\";\n",
-    "        .choices.value   as \"the value of the choice\";\n",
-    "        .choices.correct as \"you decide whether this choice is correct or not\";\n",
-    "        .answer          as \"you repeat the value of the choice that best answer the question\";\n",
-    "    }\n",
-    "}\"\"\"\n",
-    "samples = [\n",
-    "    (\n",
-    "        {},\n",
-    "        {\n",
-    "            \"topic\" : \"the topic of the question\",\n",
-    "            \"question\" : \"an exmaple question to show how it compiles\",\n",
-    "            \"choices\" : [\n",
-    "                \"the first choice\",\n",
-    "                \"the 2nd choice\",\n",
-    "                \"yet another 3rd choice\",\n",
-    "                \"final 4th choice\"\n",
-    "            ]\n",
+    "        E[0:2] is {\n",
+    "          F is bool;\n",
     "        }\n",
-    "    )\n",
-    "]\n",
-    "\n",
-    "source = \"\"\"\n",
-    "prompt main {\n",
-    "    is {\n",
-    "        choices[4] is text<40>;\n",
-    "        thought[1:3] is text<5>;\n",
-    "        answer is repeat(.choices);\n",
+    "        G is {\n",
+    "          H is bool;\n",
+    "          I is bool;\n",
+    "        }\n",
+    "        J is bool;\n",
     "    }\n",
     "    channel {\n",
-    "        to .choices from ?choices;\n",
-    "    }\n",
-    "    return {\n",
-    "        from .answer;\n",
-    "    }\n",
-    "    annotate {\n",
-    "        _ as \"You are answering a multiple choice questionnaire.\";\n",
-    "        .choices as \"possible choices\";\n",
-    "        .thought as \"think about the choices\";\n",
-    "        .answer  as \"repeat the correct choice\";\n",
+    "        to .B.C from ?inC;\n",
+    "        to .E   from ?inE;\n",
+    "        to .G   from ?inG;\n",
     "    }\n",
     "}\"\"\"\n",
     "samples = [\n",
     "    (\n",
-    "        {},\n",
-    "        {\n",
-    "            \"choices\" : [\n",
-    "                \"first\",\n",
-    "                \"second\",\n",
-    "                \"third\",\n",
-    "                \"fourth\"\n",
-    "            ]\n",
-    "        }\n",
+    "      {},\n",
+    "      {\n",
+    "        \"inC\" : [ 'true', 'false' ],\n",
+    "        \"inE\" : [ { 'F' : 'true' } ],\n",
+    "        \"inG\" : { 'H' : 'true' }\n",
+    "      }\n",
+    "    ),\n",
+    "    (\n",
+    "      {},\n",
+    "      {\n",
+    "        \"inC\" : [ 'true' ],\n",
+    "        \"inE\" : [],\n",
+    "        \"inG\" : { 'H' : 'true' }\n",
+    "      }\n",
     "    )\n",
-    "]\n",
-    "\n",
-    "# source = \"\"\"\n",
-    "# prompt main {\n",
-    "#     is {\n",
-    "#         choices[4] is text<40>;\n",
-    "#         answer is repeat(.choices);\n",
-    "#     }\n",
-    "#     channel {\n",
-    "#         to .choices from ?choices;\n",
-    "#     }\n",
-    "#     return {\n",
-    "#         from .answer;\n",
-    "#     }\n",
-    "#     annotate {\n",
-    "#         _ as \"You are answering a multiple choice questionnaire.\";\n",
-    "#         .choices as \"possible choices\";\n",
-    "#         .answer  as \"repeat the correct choice\";\n",
-    "#     }\n",
-    "# }\"\"\"\n",
-    "# samples = [\n",
-    "#     (\n",
-    "#         {},\n",
-    "#         {\n",
-    "#             \"choices\" : [\n",
-    "#                 \"first\",\n",
-    "#                 \"second\",\n",
-    "#                 \"third\",\n",
-    "#                 \"fourth\"\n",
-    "#             ]\n",
-    "#         }\n",
-    "#     )\n",
-    "# ]"
+    "]"
    ]
   }
  ],
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000..ef043bf
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1 @@
+**/*-results.json