reverted back to spark

microsoft · Oct 2, 2024 · e0d5812 · e0d5812
1 parent 852b3e1
commit e0d5812
Show file tree

Hide file tree

Showing 8 changed files with 18 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # Semantic Link Labs
 
 [![PyPI version](https://badge.fury.io/py/semantic-link-labs.svg)](https://badge.fury.io/py/semantic-link-labs)
-[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.8.1&style=flat)](https://readthedocs.org/projects/semantic-link-labs/)
+[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.8.2&style=flat)](https://readthedocs.org/projects/semantic-link-labs/)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![Downloads](https://static.pepy.tech/badge/semantic-link-labs)](https://pepy.tech/project/semantic-link-labs)
 
@@ -77,6 +77,7 @@ An even better way to ensure the semantic-link-labs library is available in your
 2. Select your newly created environment within the 'Environment' drop down in the navigation bar at the top of the notebook
 
 ## Version History
+* [0.8.2](https://github.com/microsoft/semantic-link-labs/releases/tag/0.8.2) (October 2, 2024)
 * [0.8.1](https://github.com/microsoft/semantic-link-labs/releases/tag/0.8.1) (October 2, 2024)
 * [0.8.0](https://github.com/microsoft/semantic-link-labs/releases/tag/0.8.0) (September 25, 2024)
 * [0.7.4](https://github.com/microsoft/semantic-link-labs/releases/tag/0.7.4) (September 16, 2024)

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -12,5 +12,4 @@ IPython
 polib
 powerbiclient
 azure.mgmt.resource
-jsonpath_ng
-deltalake
+jsonpath_ng
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -13,7 +13,7 @@
 project = 'semantic-link-labs'
 copyright = '2024, Microsoft and community'
 author = 'Microsoft and community'
-release = '0.8.1'
+release = '0.8.2'
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ name="semantic-link-labs"
 authors = [
     { name = "Microsoft Corporation" },
 ]
-version="0.8.1"
+version="0.8.2"
 description="Semantic Link Labs for Microsoft Fabric"
 readme="README.md"
 requires-python=">=3.10,<3.12"
@@ -29,7 +29,6 @@ dependencies = [
     "polib",
     "azure.mgmt.resource",
     "jsonpath_ng",
-    "deltalake",
 ]
 
 [tool.setuptools.packages.find]
@@ -47,7 +46,7 @@ test = [
 Repository = "https://github.com/microsoft/semantic-link-labs.git"
 
 [[tool.mypy.overrides]]
-module = "sempy.*,Microsoft.*,System.*,anytree.*,powerbiclient.*,synapse.ml.services.*,polib.*,azure.mgmt.resource.*,jsonpath_ng.*,deltalake.*"
+module = "sempy.*,Microsoft.*,System.*,anytree.*,powerbiclient.*,synapse.ml.services.*,polib.*,azure.mgmt.resource.*,jsonpath_ng.*"
 ignore_missing_imports = true
 
 [tool.flake8]

diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py
@@ -1132,12 +1132,13 @@ def generate_guid():
     return str(uuid.uuid4())
 
 
-def get_max_run_id(table_name: str) -> int:
+def get_max_run_id(lakehouse: str, table_name: str) -> int:
 
-    import deltalake
-    table_path = f"/lakehouse/default/Tables/{table_name}/"
-    delta_table = deltalake.DeltaTable(table_path)
-    data = delta_table.to_pandas()
-    max_run_id = data["RunId"].max()
+    from pyspark.sql import SparkSession
+
+    spark = SparkSession.builder.getOrCreate()
+    query = f"SELECT MAX(RunId) FROM {lakehouse}.{table_name}"
+    dfSpark = spark.sql(query)
+    max_run_id = dfSpark.collect()[0][0]
 
     return max_run_id
diff --git a/src/sempy_labs/_model_bpa.py b/src/sempy_labs/_model_bpa.py
@@ -350,7 +350,9 @@ def translate_using_spark(rule_file):
         if len(lakeT_filt) == 0:
             runId = 1
         else:
-            max_run_id = get_max_run_id(table_name=delta_table_name)
+            max_run_id = get_max_run_id(
+                lakehouse=lakehouse, table_name=delta_table_name
+            )
             runId = max_run_id + 1
 
         now = datetime.datetime.now()

diff --git a/src/sempy_labs/_model_bpa_bulk.py b/src/sempy_labs/_model_bpa_bulk.py
@@ -92,7 +92,7 @@ def run_model_bpa_bulk(
     if len(lakeT_filt) == 0:
         runId = 1
     else:
-        max_run_id = get_max_run_id(table_name=output_table)
+        max_run_id = get_max_run_id(lakehouse=lakehouse, table_name=output_table)
         runId = max_run_id + 1
 
     if isinstance(workspace, str):

diff --git a/src/sempy_labs/_vertipaq.py b/src/sempy_labs/_vertipaq.py
@@ -499,7 +499,7 @@ def vertipaq_analyzer(
         if len(lakeT_filt) == 0:
             runId = 1
         else:
-            max_run_id = get_max_run_id(table_name=lakeTName)
+            max_run_id = get_max_run_id(lakehouse=lakehouse, table_name=lakeTName)
             runId = max_run_id + 1
 
         dfMap = {