diff --git a/silnlp/nmt/config.py b/silnlp/nmt/config.py
index 910b9345..1d892ef0 100644
--- a/silnlp/nmt/config.py
+++ b/silnlp/nmt/config.py
@@ -493,7 +493,8 @@ def preprocess(self, stats: bool, force_align: bool = False) -> None:
                 LOGGER.error(f"The source file {str(file)} does not exist.")
                 return
 
-        self._build_vocabs(stats)
+        if self.data["tokenize"]:
+            self._build_vocabs(stats)
         tokenizer = self.create_tokenizer()
         self._build_corpora(tokenizer, stats, force_align)
         LOGGER.info("Preprocessing completed")
@@ -557,7 +558,7 @@ def _build_corpora(self, tokenizer: Tokenizer, stats: bool, force_align: bool) -
             dict_count = self._write_dictionary(tokenizer, src_terms_files, trg_terms_files)
             LOGGER.info(f"dictionary size: {dict_count}")
 
-        if stats:
+        if stats and self.data["tokenize"]:
             self._calculate_tokenization_stats()
 
         return train_count
diff --git a/silnlp/nmt/hugging_face_config.py b/silnlp/nmt/hugging_face_config.py
index 6e53f8bf..1d8b9be0 100644
--- a/silnlp/nmt/hugging_face_config.py
+++ b/silnlp/nmt/hugging_face_config.py
@@ -540,7 +540,7 @@ def _build_vocabs(self, stats: bool = False) -> None:
                 ["Target", 0],
             ]
 
-        if stats:
+        if stats and self.data["tokenize"]:
             stats_columns = pd.MultiIndex.from_tuples(
                 [
                     (" ", "Translation Side"),