From 7c126314d020fb4430495c948fd593b3bd8815fe Mon Sep 17 00:00:00 2001
From: Aalanli <73255774+Aalanli@users.noreply.github.com>
Date: Tue, 26 Sep 2023 23:03:21 -0400
Subject: [PATCH] [Config] Add configuration file (#359)

Auto generates the following config file at ~/.config/hidet/hidet.toml,
which the user can then edit to customize the config.

Setting `hidet.option....` in a script does not change this file.

```toml
# The (warmup, number, repeat) parameters for benchmarking. The benchmarking will run warmup + number * repeat times.
bench_config = [3, 10, 3]

# The search space level.
#   choices: [0, 1, 2]
search_space = 0

# Whether to enable operator cache on disk.
#   choices: [True, False]
cache_operator = true

# The directory to store the cache.
cache_dir = "/home/allan/Programs/hidet_repo/hidet/.hidet_cache"

# Whether to build operators in parallel.
#   choices: [True, False]
parallel_build = false

# The pair (max_parallel_jobs, mem_gb_per_job) that describe the maximum number of parallel jobs and memory reserved for each job
parallel_tune = [-1, 1.5]

# Whether to save the IR when lower an IRModule to the operator cache.
#   choices: [True, False]
save_lower_ir = false

# Whether to cache the generated kernels during tuning.
#   choices: [True, False]
debug_cache_tuning = false

# Whether to show the variable id in the IR.
#   choices: [True, False]
debug_show_var_id = false

# Whether to check shapes of compiled graph and tasks during execution.
#   choices: [True, False]
runtime_check = true

# Whether to show the verbose flow graph.
#   choices: [True, False]
debug_show_verbose_flow_graph = false

# The address of the compile server. Can be an IP address or a domain name.
"compile_server.addr" = "localhost"

# The port of the compile server.
"compile_server.port" = 8329

# Whether to enable the compile server.
#   choices: [True, False]
"compile_server.enabled" = false

# The user name to access the compile server.
"compile_server.username" = "admin"

# The password to access the compile server.
"compile_server.password" = "admin_password"

# The URL of the repository that the remote server will use.
"compile_server.repo_url" = "https://github.com/hidet-org/hidet"

# The version (e.g., branch, commit, or tag) that the remote server will use.
"compile_server.repo_version" = "main"

# The CUDA architecture to compile the kernels for (e.g., "sm_70"). "auto" for auto-detect.
"cuda.arch" = "auto"

```

---------

Co-authored-by: Allan Lin <allan.lin@centml.ai>
---
 python/hidet/option.py | 101 +++++++++++++++++++++++++++++++++++++----
 requirements.txt       |   5 +-
 setup.py               |   3 +-
 3 files changed, 99 insertions(+), 10 deletions(-)

diff --git a/python/hidet/option.py b/python/hidet/option.py
index 473bebdd4..fbfc80dba 100644
--- a/python/hidet/option.py
+++ b/python/hidet/option.py
@@ -10,8 +10,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import annotations
-from typing import Dict, Any, List, Optional, Callable, Iterable, Tuple
+from typing import Dict, Any, List, Optional, Callable, Iterable, Tuple, Union
 import os
+import tomlkit
 
 
 class OptionRegistry:
@@ -36,6 +37,81 @@ def __init__(
         self.checker = checker
 
 
+def create_toml_doc() -> tomlkit.TOMLDocument:
+    def nest_flattened_dict(d: Dict[str, Any]) -> Dict[str, Any]:
+        new_dict = {}
+        for k, v in d.items():
+            if '.' in k:
+                prefix, suffix = k.split('.', 1)
+                if prefix not in new_dict:
+                    new_dict[prefix] = {suffix: v}
+                else:
+                    new_dict[prefix][suffix] = v
+            else:
+                new_dict[k] = v
+        for k, v in new_dict.items():
+            if isinstance(v, dict):
+                new_dict[k] = nest_flattened_dict(v)
+        return new_dict
+
+    def gen_doc(d: Dict[str, Any], toml_doc: tomlkit.TOMLDocument):
+        for k, v in d.items():
+            if isinstance(v, dict):
+                table = tomlkit.table()
+                gen_doc(v, table)
+                toml_doc.add(k, table)
+            elif isinstance(v, OptionRegistry):
+                toml_doc.add(tomlkit.comment(v.description))
+                if v.choices is not None:
+                    toml_doc.add(tomlkit.comment(f'  choices: {v.choices}'))
+                if isinstance(v.default_value, (bool, int, float, str)):
+                    toml_doc.add(k, v.default_value)
+                elif isinstance(v.default_value, Tuple):
+                    # represent tuples are toml arrays, do not allow python lists are default values to avoid ambiguity
+                    val = list(v.default_value)
+                    arr = tomlkit.array()
+                    arr.extend(val)
+                    toml_doc.add(k, arr)
+                else:
+                    raise ValueError(f'Invalid type of default value for option {k}: {type(v.default_value)}')
+                toml_doc.add(tomlkit.nl())
+            else:
+                raise ValueError(f'Invalid type of default value for option {k}: {type(v)}')
+
+    fd = nest_flattened_dict(OptionRegistry.registered_options)
+    doc = tomlkit.document()
+    gen_doc(fd, doc)
+    return doc
+
+
+def _load_config(config_file_path: str):
+    def collapse_nested_dict(d: Dict[str, Any]) -> Dict[str, Union[str, int, float, bool, Tuple]]:
+        # {"cuda": {"arch": "hopper", "cc": [9, 0]}} -> {"cuda.arch": 90, "cuda.cc": (9, 0)}
+        ret = {}
+        for k, v in d.items():
+            if isinstance(v, dict):
+                v = collapse_nested_dict(v)
+                for k1, v1 in v.items():
+                    ret[f'{k}.{k1}'] = v1
+                continue
+            if isinstance(v, list):
+                v = tuple(v)
+            ret[k] = v
+        return ret
+
+    with open(config_file_path, 'r') as f:
+        config_doc = tomlkit.parse(f.read())
+    for k, v in collapse_nested_dict(config_doc).items():
+        if k not in OptionRegistry.registered_options:
+            raise KeyError(f'Option {k} found in config file {config_file_path} is not registered.')
+        OptionRegistry.registered_options[k].default_value = v
+
+
+def _write_default_config(config_file_path: str, config_doc: tomlkit.TOMLDocument):
+    with open(config_file_path, 'w') as f:
+        tomlkit.dump(config_doc, f)
+
+
 def register_option(
     name: str,
     type_hint: str,
@@ -177,11 +253,20 @@ def register_hidet_options():
     )
     register_option(
         name='cuda.arch',
-        type_hint='Optional[str]',
-        default_value=None,
-        description='The CUDA architecture to compile the kernels for (e.g., "sm_70"). None for auto-detect.',
+        type_hint='str',
+        default_value='auto',
+        description='The CUDA architecture to compile the kernels for (e.g., "sm_70"). "auto" for auto-detect.',
     )
 
+    config_file_path = os.path.join(os.path.expanduser('~'), '.config', 'hidet')
+    if not os.path.exists(config_file_path):
+        os.makedirs(config_file_path)
+    config_file_path = os.path.join(config_file_path, 'hidet.toml')
+    if not os.path.exists(config_file_path):
+        _write_default_config(config_file_path, create_toml_doc())
+    else:
+        _load_config(config_file_path)
+
 
 register_hidet_options()
 
@@ -662,15 +747,15 @@ def debug_show_verbose_flow_graph(enable: bool = True):
 
 class cuda:
     @staticmethod
-    def arch(arch: Optional[str] = None):
+    def arch(arch: str = 'auto'):
         """
         Set the CUDA architecture to use when building CUDA kernels.
 
         Parameters
         ----------
         arch: Optional[str]
-            The CUDA architecture, e.g., 'sm_35', 'sm_70', 'sm_80', etc. None means using the architecture of the first
-            CUDA GPU on the current machine. Default None.
+            The CUDA architecture, e.g., 'sm_35', 'sm_70', 'sm_80', etc. "auto" means
+            using the architecture of the first CUDA GPU on the current machine. Default "auto".
         """
         OptionContext.current().set_option('cuda.arch', arch)
 
@@ -685,7 +770,7 @@ def get_arch() -> str:
             The CUDA architecture, e.g., 'sm_35', 'sm_70', 'sm_80', etc.
         """
         arch: Optional[str] = OptionContext.current().get_option('cuda.arch')
-        if arch is None:
+        if arch == "auto":
             import hidet.cuda
 
             # get the architecture of the first CUDA GPU
diff --git a/requirements.txt b/requirements.txt
index 17a8871ac..bb21b1f04 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -37,5 +37,8 @@ filelock
 
 requests
 
+# for configuration
+tomlkit
+
 # for parser
-lark
\ No newline at end of file
+lark
diff --git a/setup.py b/setup.py
index 4252457b6..dda947129 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,8 @@
         "requests",
         "filelock",
         "cuda-python>=11.6.1; platform_system=='Linux'",
-        "lark"
+        "lark",
+        "tomlkit"
     ],
     platforms=["linux"],
     entry_points={