macrocosm-os · Sarkosos · Jan 27, 2025 · Jan 13, 2025 · Jan 14, 2025 · Jan 14, 2025
diff --git a/.env.example b/.env.example
@@ -2,3 +2,10 @@ S3_REGION = "nyc3"
 S3_ENDPOINT = "https://nyc3.digitaloceanspaces.com"
 S3_KEY = "s3_key"
 S3_SECRET = "secret_key"
+RQLITE_HTTP_ADDR=0.0.0.0:4001
+RQLITE_RAFT_ADDR=0.0.0.0:4002
+RQLITE_HTTP_ADV_ADDR=123.456.7.8:4001
+RQLITE_RAFT_ADV_ADDR=123.456.7.8:4002
+RQLITE_DATA_DIR=db/
+JOIN_ADDR=174.138.3.61:4002
+HOTKEY=your_hotkey
diff --git a/.gitignore b/.gitignore
@@ -189,3 +189,6 @@ folding/db
 /charmm36-jul2022.ff
 
 tests/mock_data
+
+db
+local-gjp
diff --git a/README.md b/README.md
@@ -190,6 +190,7 @@ pm2 start pm2_configs/validator.config.js
 ```
 Keep in mind that you will need to change the default parameters for either the [miner](./scripts/run_miner.sh) or the [validator](./scripts/run_validator.sh). 
 
+Miners now have the opportunity to interact with the global job pool (GJP) locally. By creating a read-only node via `start_read_node.sh`, miners sync with the GJP on their local machine in the `db` directory. We have provided a script `scripts/query_rqlite.py` that returns jobs based on their priority in the GJP, or returns a specific jopb specified by `pdb_id`. With this information, miners can experiment with customizing their job queue. This script can also be helpful for downloading and analyzing checkpoint files from other miners. Please see the updated environment variables in `.env.example` and specify your public IP address in the following fields: `RQLITE_HTTP_ADV_ADDR`,`RQLITE_RAFT_ADV_ADDR`. 
 ## How does the Subnet Work?
 
 In this subnet, validators create protein folding challenges for miners, who in turn run simulations using OpenMM to obtain stable protein configurations. At a high level, each role can be broken down into parts: 

diff --git a/folding/miners/folding_miner.py b/folding/miners/folding_miner.py
@@ -9,6 +9,7 @@
 from typing import Dict, List, Tuple
 import copy
 import traceback
+import asyncio 
 
 import bittensor as bt
 import openmm as mm
@@ -18,7 +19,6 @@
 from folding.base.miner import BaseMinerNeuron
 from folding.base.simulation import OpenMMSimulation
 from folding.protocol import JobSubmissionSynapse
-from folding.utils.logging import log_event
 from folding.utils.reporters import ExitFileReporter, LastTwoCheckpointsReporter
 from folding.utils.ops import (
     check_if_directory_exists,
@@ -146,6 +146,7 @@ def __init__(self, config=None, base_data_path: str = None):
 
         self.mock = None
         self.generate_random_seed = lambda: random.randint(0, 1000)
+        self.db_path = "/db/db.sqlite"
 
         # hardcorded for now -- TODO: make this more flexible
         self.STATES = ["nvt", "npt", "md_0_1"]
@@ -362,6 +363,7 @@ def forward(self, synapse: JobSubmissionSynapse) -> JobSubmissionSynapse:
             elif len(synapse.md_inputs) == 0:  # The vali sends nothing to the miner
                 return check_synapse(self=self, synapse=synapse, event=event)
 
+
     def submit_simulation(
         self,
         synapse: JobSubmissionSynapse,

diff --git a/install.sh b/install.sh
@@ -41,4 +41,5 @@ poetry install
 sudo apt-get update
 sudo apt-get install build-essential cmake libfftw3-dev vim npm -y
 sudo npm install -g pm2 -y
+chmod +x install_rqlite.sh
 ./install_rqlite.sh
diff --git a/scripts/query_rqlite.py b/scripts/query_rqlite.py
@@ -0,0 +1,133 @@
+import sqlite3
+import requests
+import os
+import json
+from typing import Dict, List, Optional
+from folding.utils.logger import logger
+
+
+def fetch_job_details(db_path: str, max_workers: int, columns: List[str], pdb_id: Optional[str] = None) -> Dict:
+    """
+    Fetches job records from GJP database based on priority and specified fields.
+    Optionally filters by a specific pdb_id if provided.
+
+    Parameters:
+        db_path (str): The file path to the SQLite database.
+        max_workers (int): The maximum number of job records to fetch, sorted by priority in descending order.
+        columns (List[str]): The list of columns to fetch from the database.
+        pdb_id (Optional[str]): Specific pdb_id to filter the jobs by. If None, fetches jobs without filtering.
+
+    Returns:
+        Dict: A dictionary mapping each job 'id' to its details as specified in the columns list.
+    """
+    logger.info("Fetching job details from the database")
+    columns_to_select = ', '.join(columns)
+    with sqlite3.connect(db_path) as conn:
+        cursor = conn.cursor()
+        if pdb_id:
+            query = f"SELECT id, {columns_to_select} FROM jobs WHERE pdb_id = ? ORDER BY priority DESC LIMIT 1"
+            cursor.execute(query, (pdb_id,))
+        else:
+            query = f"SELECT id, {columns_to_select} FROM jobs ORDER BY priority DESC LIMIT ?"
+            cursor.execute(query, (max_workers,))
+
+        selected_columns = ['id'] + [desc[0] for desc in cursor.description[1:]]
+        jobs = cursor.fetchall()
+        if not jobs:
+            logger.info("No jobs found.")
+            return {}
+
+        jobs_dict = {}
+        for job in jobs:
+            job_details = dict(zip(selected_columns, job))
+            job_id = job_details.pop('id')
+            jobs_dict[job_id] = job_details
+        return jobs_dict
+
+def download_files(job_details: Dict, output_dir: str = "./local-gjp"):
+    """
+    Downloads files based on links contained in the job details dictionary. The function handles
+    two types of links: `s3_links` which are expected to be dictionaries containing file keys and URLs,
+    and `best_cpt_links` which are expected to be lists of URLs.
+
+    Parameters:
+        job_details (Dict): A dictionary where each key is a job_id and each value is another dictionary
+                            containing job details including 'pdb_id', 's3_links', and 'best_cpt_links'.
+        output_dir (str): The root directory where downloaded files will be organized and stored. Each set of files
+                          corresponding to a job will be placed in a subdirectory named after its `pdb_id`.
+    Note:
+        This function expects the `s3_links` to be a JSON string that can be decoded into a dictionary and `best_cpt_links`
+        to be a JSON string that can be decoded into a list. Error handling is implemented for JSON decoding issues.
+    """
+    for job_id, details in job_details.items():
+        pdb_id = details.get("pdb_id")
+        if not pdb_id:
+            logger.error(f"Missing pdb_id for job_id {job_id}")
+            continue
+
+        dir_path = os.path.join(output_dir, pdb_id)
+        os.makedirs(dir_path, exist_ok=True)
+
+        # Handle s3_links as dict
+        s3_links_str = details.get('s3_links')
+        if s3_links_str:
+            try:
+                s3_links = json.loads(s3_links_str)
+                if isinstance(s3_links, dict):
+                    for key, url in s3_links.items():
+                        download_file(pdb_id, key, url, dir_path)
+            except json.JSONDecodeError:
+                logger.error(f"Error decoding JSON for s3_links for pdb_id {pdb_id}: {s3_links_str}")
+
+        # Handle best_cpt_links as a list
+        best_cpt_links = details.get('best_cpt_links')
+        if best_cpt_links:
+            try:
+                best_cpt_links = json.loads(best_cpt_links)
+                if isinstance(best_cpt_links, list):
+                    for url in best_cpt_links:
+                        key = url.split('/')[-1]  
+                        download_file(pdb_id, key, url, dir_path)
+            except json.JSONDecodeError:
+                logger.error(f"Error decoding JSON for best_cpt_links for pdb_id {pdb_id}: {best_cpt_links}")
+
+def download_file(pdb_id, key, url, dir_path):
+    """
+    Handles the downloading of a single file from a specified URL into a specified directory path. This function
+    is called by 'download_files' to manage individual file downloads.
+
+    Parameters:
+        pdb_id (str): The PDB ID associated with the job, used for logging purposes.
+        key (str): A key or filename identifier for the file being downloaded.
+        url (str): The URL from which the file will be downloaded.
+        dir_path (str): The directory path where the file will be saved. This path should already exist.
+
+    Behavior:
+        - Attempts to download the file from the provided URL.
+        - If successful, saves the file to the specified directory with a filename based on the 'key' and 'pdb_id'.
+        - Logs the outcome of the download attempt, noting successful downloads and detailing errors for failed attempts.
+
+    Note:
+        - The function assumes HTTP(S) URLs and will handle HTTP errors. It does not perform retries and will
+          raise an exception if the download fails.
+    """
+
+    file_name = f"{key}-{pdb_id}{os.path.splitext(url)[1]}"
+    file_path = os.path.join(dir_path, file_name)
+    logger.info(f"Attempting to download from {url} to {file_path}")
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        with open(file_path, 'wb') as f:
+            f.write(response.content)
+        logger.info(f"Successfully downloaded {key} file for {pdb_id} to {file_path}")
+    except requests.exceptions.RequestException as e:
+        logger("ERROR", f"Failed to download file from {url}: {e}")
+
+if __name__ == "__main__":
+    db_path = "db/db.sqlite"
+    max_workers = 2
+    columns = ['job_id', 'pdb_id', 'best_cpt_links']
+    job_details = fetch_job_details(db_path, max_workers, columns, pdb_id='1zeg')
+    logger.info(f"Job details fetched: {job_details}")
+    download_files(job_details)
diff --git a/start_read_node.sh → scripts/start_read_node.sh b/start_read_node.sh → scripts/start_read_node.sh
@@ -13,9 +13,9 @@ PUBLIC_IP=$(curl -s ifconfig.me)
 # Default values if not set in .env
 RQLITE_HTTP_ADDR=${RQLITE_HTTP_ADDR:-0.0.0.0:4001}
 RQLITE_RAFT_ADDR=${RQLITE_RAFT_ADDR:-0.0.0.0:4002}
-RQLITE_HTTP_ADV_ADDR=${RQLITE_HTTP_ADV_ADDR:-PUBLIC_IP:4001}
-RQLITE_RAFT_ADV_ADDR=${RQLITE_RAFT_ADV_ADDR:-PUBLIC_IP:4002}
-RQLITE_DATA_DIR=${RQLITE_DATA_DIR:-db/}
+RQLITE_HTTP_ADV_ADDR=${RQLITE_HTTP_ADV_ADDR:-$PUBLIC_IP:4001}
+RQLITE_RAFT_ADV_ADDR=${RQLITE_RAFT_ADV_ADDR:-$PUBLIC_IP:4002}
+RQLITE_DATA_DIR=${RQLITE_DATA_DIR:-$(pwd)/db/}
 JOIN_ADDR=${JOIN_ADDR:-}
 HOTKEY=${HOTKEY:-}
 
@@ -35,6 +35,8 @@ cleanup() {
 # Set trap to catch SIGINT (Ctrl+C) and cleanup
 trap cleanup SIGINT SIGTERM
 
+echo "Using RQLITE_DATA_DIR: $RQLITE_DATA_DIR"
+
 # Start RQLite
 rqlited -node-id ${HOTKEY} \
   -http-addr ${RQLITE_HTTP_ADDR} \

diff --git a/tests/test_miner_functions.py b/tests/test_miner_functions.py
@@ -0,0 +1,43 @@
+import pytest 
+from folding.miners.folding_miner import check_sqlite_table 
+import unittest
+from unittest.mock import patch, MagicMock
+import sqlite3
+
+class TestCheckSqliteTable(unittest.TestCase):
+    def setUp(self):
+        self.sample_data = [
+                (1, 'job1', 'pdb1', '2025-01-01 00:00:00', 10, True, '{"pdb": "link_to_pdb"}', '{"ff": "file.xml"}'),
+                (2, 'job2', 'pdb2', '2025-01-02 00:00:00', 5, False, '{"pdb": "link_to_pdb2"}', '{"ff": "file2.xml"}')
+            ]
+        self.columns = ['id', 'job_id', 'pdb_id', 'created_at', 'priority', 'is_organic', 's3_links', 'system_config']
+
+    @patch('folding.utils.logger.logger')
+    @patch('sqlite3.connect')
+    def test_check_sqlite_table_success(self, mock_connect, mock_logger):
+        mock_cursor = MagicMock()
+        mock_cursor.fetchall.return_value = self.sample_data
+        mock_cursor.description = [(column,) for column in self.columns]
+        mock_connect.return_value.__enter__.return_value.cursor.return_value = mock_cursor
+
+        result = check_sqlite_table(db_path='test_path.db', max_workers=2)
+        print(len(result))
+        # check if result is correctly formatted 
+        self.assertIsInstance(result, dict)
+        self.assertEqual(len(result), 2)
+        self.assertTrue(all(isinstance(result[key], dict) for key in result))
+
+        # check if all expected keys are in the results
+        expected_keys = ['job_id', 'pdb_id', 'created_at', 'priority', 'is_organic', 's3_links', 'system_config']
+        for job_details in result.values():
+            self.assertTrue(all(key in job_details for key in expected_keys))
+
+    @patch('folding.utils.logger.logger')
+    @patch('sqlite3.connect')
+    def test_check_sqlite_table_error(self, mock_connect, mock_logger):
+        mock_connect.side_effect = sqlite3.Error("Fake SQL Error")
+
+        result = check_sqlite_table(db_path='test_path.db', max_workers=2)
+
+        # check if result is None
+        self.assertIsNone(result)