reenable sqlserver support from defog-python

defog-ai · Jun 25, 2024 · edf5eda · edf5eda
1 parent 563aedc
commit edf5eda
Show file tree

Hide file tree

Showing 3 changed files with 96 additions and 0 deletions.
diff --git a/defog/__init__.py b/defog/__init__.py
@@ -16,6 +16,7 @@
     "bigquery",
     "snowflake",
     "databricks",
+    "sqlserver",
 ]
 
 

diff --git a/defog/generate_schema.py b/defog/generate_schema.py
@@ -485,6 +485,81 @@ def generate_bigquery_schema(
         return schemas
 
 
+def generate_sqlserver_schema(
+    self,
+    tables: list,
+    upload: bool = True,
+    return_format: str = "csv",
+    return_tables_only: bool = False,
+) -> str:
+    try:
+        import pyodbc
+    except:
+        raise Exception("pyodbc not installed.")
+
+    conn = pyodbc.connect(self.db_creds["connection_string"])
+    cur = conn.cursor()
+    schemas = {}
+    schema = self.db_creds.get("schema", "dbo")
+
+    if len(tables) == 0:
+        # get all tables
+        cur.execute(
+            "SELECT table_name FROM information_schema.tables WHERE table_schema = %s;",
+            (schema,),
+        )
+        if schema == "dbo":
+            tables += [row[0] for row in cur.fetchall()]
+        else:
+            tables += [schema + "." + row[0] for row in cur.fetchall()]
+
+    if return_tables_only:
+        return tables
+
+    print("Getting schema for each table in your database...")
+    # get the schema for each table
+    for table_name in tables:
+        cur.execute(
+            f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{table_name}';"
+        )
+        rows = cur.fetchall()
+        rows = [row for row in rows]
+        rows = [{"column_name": i[0], "data_type": i[1]} for i in rows]
+        schemas[table_name] = rows
+
+    conn.close()
+    if upload:
+        print(
+            "Sending the schema to Defog servers and generating column descriptions. This might take up to 2 minutes..."
+        )
+        r = requests.post(
+            f"{self.base_url}/get_schema_csv",
+            json={
+                "api_key": self.api_key,
+                "schemas": schemas,
+                "foreign_keys": [],
+                "indexes": [],
+            },
+        )
+        resp = r.json()
+        if "csv" in resp:
+            csv = resp["csv"]
+            if return_format == "csv":
+                pd.read_csv(StringIO(csv)).to_csv("defog_metadata.csv", index=False)
+                return "defog_metadata.csv"
+            else:
+                return csv
+        else:
+            print(f"We got an error!")
+            if "message" in resp:
+                print(f"Error message: {resp['message']}")
+            print(
+                f"Please feel free to open a github issue at https://github.com/defog-ai/defog-python if this a generic library issue, or email [email protected]."
+            )
+    else:
+        return schemas
+
+
 def generate_db_schema(
     self,
     tables: list,
@@ -541,6 +616,13 @@ def generate_db_schema(
             upload=upload,
             return_tables_only=return_tables_only,
         )
+    elif self.db_type == "sqlserver":
+        return self.generate_sqlserver_schema(
+            tables,
+            return_format=return_format,
+            upload=upload,
+            return_tables_only=return_tables_only,
+        )
     else:
         raise ValueError(
             f"Creation of a DB schema for {self.db_type} is not yet supported via the library. If you are a premium user, please contact us at [email protected] so we can manually add it."

diff --git a/defog/query.py b/defog/query.py
@@ -113,6 +113,19 @@ def execute_query_once(db_type: str, db_creds, query: str):
             colnames = [desc[0] for desc in cursor.description]
             results = cursor.fetchall()
         return colnames, results
+    elif db_type == "sqlserver":
+        try:
+            import pyodbc
+        except:
+            raise Exception("pyodbc not installed.")
+        conn = pyodbc.connect(**db_creds)
+        cur = conn.cursor()
+        cur.execute(query)
+        colnames = [desc[0] for desc in cur.description]
+        results = cur.fetchall()
+        cur.close()
+        conn.close()
+        return colnames, results
     else:
         raise Exception(f"Database type {db_type} not yet supported.")