Version v0.4

This commit also includes: - tested 5 differents models requiring different syntax - switches to edit the syntax - improved new CLI - model loader - test CLI and basic language execution in the same script - reworked github testing
LLNL · Mar 2, 2024 · 6216bed · 6216bed
1 parent 1501ef7
commit 6216bed
Show file tree

Hide file tree

Showing 18 changed files with 292 additions and 911 deletions.
diff --git a/.github/workflows/dataflow.yml → .github/workflows/cli.yml b/.github/workflows/dataflow.yml → .github/workflows/cli.yml
@@ -1,4 +1,4 @@
-name: dataflow
+name: cli
 
 on:
   push:
@@ -26,4 +26,4 @@ jobs:
     - name: Install
       run: pip install .
     - name: Check
-      run: cd tests/unittests ; python -m dataflow
+      run: ./tests/cli-mmlu.sh
diff --git a/.github/workflows/frontend.yml b/.github/workflows/frontend.yml
@@ -26,4 +26,4 @@ jobs:
     - name: Install
       run: pip install .
     - name: Check
-      run: cd tests/language ; python -m frontend
+      run: python tests/frontend.py
diff --git a/README.md b/README.md
@@ -22,15 +22,41 @@ git clone https://github.com/LLNL/AutoCog
 pip install -U ./AutoCog
 ```
 
+### LLM Setup
+
+#### LLama.cpp and GGUF models
+
+We download model from [TheBloke](https://huggingface.co/TheBloke) on Hugging Face.
+For example, you can donwload LlaMa 2 with 7B parameters and tuned for Chat with:
+```
+wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+```
+This is a 4 bits version, aka `Q4_K_M` in the name. It is the main model we use for testing.
+
+To run GGUF model, we use a [modified version](https://github.com/tristanvdb/llama-cpp-python/tree/choice-dev) of the `llama-cpp-python` package.
+It provides python bindings and will build `LLama.cpp`.
+Our changes permit us to implement `greedy` completion (returning logprob for all tokens).
+```
+pip install -y git+https://github.com/tristanvdb/llama-cpp-python@choice-dev
+```
+
+> TODO v0.5: connect to low-level API in `llama-cpp-python` so that we can use the default release
+
+#### HuggingFace Transformers
+
+> TODO v0.6: connection for HuggingFace Transformers package (use to have it but not tested)
+
 ### Inside a Notebook
 
 Most of the development is done inside Python notebook (jupiterlab).
-To get started take a look at the [Demo](./demo.ipynb).
 Eventually, several notebooks demonstrating various part of AutoCog will be provided in the [share](./share) folder.
+To get an idea of our progress, take a look at the [WIP Notebook](./share/wip.ipynb).
 
 ### Command line
 
-`python -m autocog --help`
+We are building a command line tool to use AutoCog.
+
+`python3 -m autocog --help`
 
 ```
 usage: __main__.py [-h] [--version] [--orch ORCH] [--gguf GGUF] [--gguf-ctx GGUF_CTX] [--syntax SYNTAX] [--cogs COGS] [--command COMMAND] [--output OUTPUT] [--prefix PREFIX] [--serve] [--host HOST] [--port PORT] [--debug]
@@ -54,12 +80,20 @@ optional arguments:
   --debug              Whether to run the flask server in debug mode. (default: False)
 ```
 
-For example, we run commands using:
+Some examples:
 ```
 python3 -m autocog --gguf /data/models/tinyllama-2-1b-miniguanaco.Q4_K_M.gguf --syntax Guanaco \
                    --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta \
                    --command '{ "__tag" : "mmlu.repeat_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }'
 ```
+```
+python3 -m autocog --gguf /data/models/llama-2-7b-chat.Q4_K_M.gguf --syntax Llama-2-Chat \
+                   --syntax '{ "prompt_with_format" : false, "prompt_with_index" : false, "prompt_indent" : "" }' \
+                   --cogs mmlu.repeat_cot:library/mmlu-exams/repeat-cot.sta \
+                   --cogs mmlu.select_cot:library/mmlu-exams/select-cot.sta \
+                   --command '{ "__tag" : "mmlu.repeat_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }' \
+                   --command '{ "__tag" : "mmlu.select_cot", "topic" : "arithmetic", "question" : "What is 3*4+9?", "choices" : [ "16", "21", "39", "42" ] }'
+```
 
 ### Web Application
 

diff --git a/autocog/arch/architecture.py b/autocog/arch/architecture.py
@@ -52,17 +52,32 @@ def load(self, tag:str, filepath:Optional[str]=None, program:Optional[str]=None,
             raise Exception(f"Unrecognized file language: {language}")
         self.register(cog)
         return cog
-
-    async def __call__(self, __tag:str, __entry:str='main', **inputs):
+
+    def get_single_cog(self):
+        if len(self.orchestrator.cogs) == 0:
+            raise Exception('No Cogs registered when one is expected')
+        if len(self.orchestrator.cogs) > 1:
+            raise Exception('More than one Cogs registered when only one is expected')
+        return list(self.orchestrator.cogs.keys())[0]
+
+    async def __call__(self, __tag:Optional[str]=None, __entry:str='main', **inputs):
+        if __tag is None:
+            __tag = self.get_single_cog()
         return (await self.orchestrator.execute(jobs=[ (__tag,__entry,inputs) ], parent=0, progress=False))[0]
 
     async def run(self, commands, progress:bool=True):
         jobs = []
         for cmd in commands:
-            tag = cmd['__tag']
-            del cmd['__tag']
-            entry = cmd['__entry']
-            del cmd['__entry']
+            if '__tag' in cmd:
+                tag = cmd['__tag']
+                del cmd['__tag']
+            else:
+                tag = self.get_single_cog()
+            if '__entry' in cmd:
+                entry = cmd['__entry']
+                del cmd['__entry']
+            else:
+                entry = 'main'
             jobs.append( (tag,entry,cmd) )
         return await self.orchestrator.execute(jobs=jobs, parent=0, progress=progress)
 

diff --git a/autocog/fta/actions.py b/autocog/fta/actions.py
@@ -44,7 +44,7 @@ def __init__(self, uid:str, text:str, successors: List[str]=[]):
         super().__init__(uid=uid, successors=successors, text=text)
 
     def prepare(self, lm):
-        self.tokens.extend(lm.tokenize(self.text, whole=False))
+        self.tokens.extend(lm.tokenize(self.text, whole=self.uid == 'root'))
 
     def step(self, lm, prompt:List[Token], step:int, min_branch:int, max_branch:int, tok_clip:float) -> Dict[Token,float]:
         return { self.tokens[step] : 1. } if step < len(self.tokens) else {}

diff --git a/autocog/fta/automaton.py b/autocog/fta/automaton.py
@@ -12,7 +12,10 @@
 
 def depthfirst(tree):
     yield tree
-    for c in tree.children.values():
+    children = tree.children
+    if isinstance(children, dict):
+        children = children.values()
+    for c in children:
         c.parent = tree
         yield from c.depthfirst()
 

diff --git a/autocog/lm/llama.py b/autocog/lm/llama.py
@@ -43,7 +43,10 @@ def detokenize(self, tokens:List[int], whole:bool=True) -> str:
         if not whole:
             tokens = [ self.model.token_nl() ] + tokens
         tokens = [ self.model.token_bos() ] + tokens + [ self.model.token_eos() ]
-        return self.model.detokenize(tokens).decode("utf-8", errors="ignore")
+        text = self.model.detokenize(tokens).decode("utf-8", errors="ignore")
+        if text.endswith('<|im_end|>'):
+            text = text[:-len('<|im_end|>')]
+        return text
 
     def impl_greedy(self, prompt: Union[str,List[int]]) -> List[float]:
         output = self.model.create_completion(prompt, max_tokens=1, logprobs=-1, full_logprobs=True)

diff --git a/autocog/sta/automaton.py b/autocog/sta/automaton.py
@@ -113,9 +113,17 @@ def coords(self):
     def prompt(self, syntax):
         field = self.abstract.field
         indent = syntax.prompt_indent * len(self.indices)
-        fmt = '(record)' if field.is_record() else '(' + field.format.label() + ')'
-        idx = f'[{self.indices[-1]}]' if field.is_list() else ''
-        prompt = indent + self.name() + fmt + idx + ':'
+        prompt = indent + self.name()
+        if syntax.prompt_with_format:
+            fmt = '(record)' if field.is_record() else '(' + field.format.label() + ')'
+            prompt += fmt
+        if syntax.prompt_with_index:
+            idx = self.indices[-1]
+            if not prompt_zero_index:
+                idx += 1
+            idx = f'[{idx}]' if field.is_list() else ''
+            prompt += idx
+        prompt += ':'
         if not field.is_record():
             prompt += ' '
         return prompt

diff --git a/autocog/sta/syntax.py b/autocog/sta/syntax.py
@@ -46,6 +46,10 @@ class Syntax(BaseModel):
     header_mid:  str = '\n'
     header_post: str = '\n'
 
+    prompt_with_format: bool = True
+    prompt_with_index:  bool = True
+    prompt_zero_index:  bool = False
+
     @staticmethod
     def Llama2Chat(**kwargs):
         kwargs.update(syntax_kwargs['Llama-2-Chat'])

diff --git a/autocog/utility/args2arch.py b/autocog/utility/args2arch.py
@@ -9,7 +9,7 @@
 from ..arch.architecture import CognitiveArchitecture as CogArch
 from ..arch.orchestrator import Serial, Async
 
-from ..sta.syntax import Syntax
+from ..sta.syntax import Syntax, syntax_kwargs
 
 def argparser():
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
@@ -19,7 +19,7 @@ def argparser():
 
     parser.add_argument('--gguf',     help="""Load a model from a GGUF file using llama.cpp (and llama-cpp-python)""", default=None)
     parser.add_argument('--gguf-ctx', help="""Context size for GGUF models""", default=4096)
-    parser.add_argument('--syntax',   help="""One of `Llama-2-Chat`, `ChatML`, `Guanaco` or a dictionary of the kwargs to initialize a Syntax object (inlined JSON or path to a file).""", default=None)
+    parser.add_argument('--syntax',   help=f"""One of `{'`, `'.join(syntax_kwargs.keys())}` or a dictionary of the kwargs to initialize a Syntax object (inlined JSON or path to a file). If used more than once, only the first can be string, the next ones must be dictionaries, and later values override the earlier ones.""", default=None, action='append')
     parser.add_argument('--cogs',     help="""Files to load as cog in the architecture, prefix with its identifier else the filename is used. For example, `some/cognitive/mcq.sta` and `my.tool:some/python/tool.py` will load a Structured Thought Automaton as `mcq` and a Python file as `my.tool`.""", action='append')
 
     parser.add_argument('--command',  help="""Command to be executed by the architecture as a dictionary. `__tag` identify the cog while `__entry` identify the entry point in this cog (defaults to `main`). All other field will be forwarded as keyworded args. Example: `{ "__tag" : "writer", "__entry" : "main", **kwarg }` (inlined JSON or path to a file). Can also provide one or more list of dictionary.""", action='append')
@@ -58,16 +58,15 @@ def parseargs(argv):
         from autocog.lm import RLM
         lm = RLM()
 
-    if args.syntax == 'Llama-2-Chat':
-        syntax = Syntax.Llama2Chat()
-    elif args.syntax == 'ChatML':
-        syntax = Syntax.ChatML()
-    elif args.syntax == 'Guanaco':
-        syntax = Syntax.Guanaco()
-    elif args.syntax is not None:
-        syntax = Syntax(**parse_json(args.syntax))
-    else:
-        syntax = Syntax()
+    syntax = {}
+    if len(args.syntax) > 0:
+        if args.syntax[0] in syntax_kwargs:
+            syntax.update(syntax_kwargs[args.syntax[0]])
+        else:
+            syntax.update(parse_json(args.syntax))
+        for s in args.syntax[1:]:
+            syntax.update(parse_json(s))
+    syntax = Syntax(**syntax)
 
     arch = CogArch(Orch=Orch, lm=lm, syntax=syntax)
 

diff --git a/autocog/utility/models.py b/autocog/utility/models.py
@@ -0,0 +1,35 @@
+
+from ..sta.syntax import Syntax, syntax_kwargs as SyntaxKwargs
+from ..lm import RLM
+from ..lm import Llama
+
+def loader(models_path=None, syntax=None, n_ctx=4096, **syntax_kwargs):
+    if models_path is None:
+        lm = RLM()
+        syntax = Syntax(**syntax_kwargs)
+    else:
+        lm = Llama(model_path=models_path, n_ctx=n_ctx)
+
+        if syntax is None:
+            # TODO does llama.cpp (or GUFF) contains that info?
+            model_name = models_path.split('/')[-1]
+            if model_name.find('llama-2') >= 0 and model_name.find('chat') >= 0:
+                syntax = 'Llama-2-Chat'
+            elif model_name.find('capybarahermes') >= 0:
+                syntax = 'ChatML'
+            elif model_name.find('tinyllama') >= 0 and model_name.find('chat') >= 0:
+                syntax = 'ChatML'
+            elif model_name.find('tinyllama') >= 0 and model_name.find('miniguanaco') >= 0:
+                syntax = 'Guanaco'
+        else:
+            assert syntax in SyntaxKwargs
+
+        if syntax is None:
+            syntax = syntax_kwargs
+        else:
+            syntax = SyntaxKwargs[syntax]
+            syntax.update(syntax_kwargs)
+
+        syntax = Syntax(**syntax)
+
+    return (lm,syntax)