tl-its-umich-edu · jxiao21 · Jul 25, 2024 · Jul 30, 2024 · Jul 31, 2024 · Jul 30, 2024
diff --git a/config/cron_udp.hjson b/config/cron_udp.hjson
@@ -207,7 +207,7 @@
         (
             select
                 distinct cse.person_id as user_id
-            from context_store_entity.course_section_enrollment cse 
+            from context_store_entity.course_section_enrollment cse
                 left join context_store_entity.course_section cs
                     on cse.course_section_id = cs.course_section_id
                 left join context_store_keymap.course_offering co

diff --git a/config/env_sample.hjson b/config/env_sample.hjson
@@ -90,7 +90,8 @@
         "ROOT_PASSWORD": "student_dashboard_root_pw"
     },
     # Default Canvas Data id increment for course id, user id, etc
-    "CANVAS_DATA_ID_INCREMENT": 17700000000000000,
+    # for Unizin synthetic data, the value is 1000000000000
+    "CANVAS_DATA_ID_INCREMENT": 1000000000000,
     # Canvas Configuration
     "CANVAS_USER": "",
     # strings for construct file download url
@@ -304,5 +305,11 @@
     "COURSES_ENABLED": false,
 
     # Path to the hjson file contains cron queries
-    "CRON_QUERY_FILE": "config/cron_udp.hjson"
+    "CRON_QUERY_FILE": "config/cron_udp.hjson",
+
+    # Change the default Bigquery Project ID
+    "DEFAULT_PROJECT_ID": "udp-umich-prod",
+    # Change the dataset project ID where queries are run against
+    "DATASET_PROJECT_ID": "unizin-shared"
+
 }
diff --git a/dashboard/cron.py b/dashboard/cron.py
@@ -52,7 +52,7 @@ def setup_queries(self):
 
     def setup_bigquery(self):
         # Instantiates a client
-        self.bigquery_client = bigquery.Client()
+        self.bigquery_client = bigquery.Client(project=settings.DEFAULT_PROJECT_ID)
 
         # BQ Total Bytes Billed to report to status
         self.total_bytes_billed = 0
@@ -99,24 +99,24 @@ def execute_bq_query(self, query: str, bq_job_config: Optional[bigquery.QueryJob
         # Remove the newlines from the query
         query = query.replace("\n", " ")
 
-        if bq_job_config:
-            try:
-                # Convert to bq schema object
-                query_job = self.bigquery_client.query(query, job_config=bq_job_config)
-                query_job_result = query_job.result()
+        # Create a new QueryJobConfig if none is provided
+        if bq_job_config is None:
+            bq_job_config = bigquery.QueryJobConfig()
 
-                self.total_bytes_billed += query_job.total_bytes_billed
-                logger.debug(f"This job had {query_job.total_bytes_billed} bytes. Total: {self.total_bytes_billed}")
-                return query_job_result
-            except Exception as e:
-                logger.error(f"Error ({str(e)}) in setting up schema for query {query}.")
-                raise Exception(e)
-        else:
-            query_job = self.bigquery_client.query(query)
+        # Add the dataset_project_id connection property to the job config
+        bq_job_config.connection_properties = [bigquery.ConnectionProperty("dataset_project_id", settings.DATASET_PROJECT_ID)]
+
+        try:
+            # Convert to bq schema object
+            query_job = self.bigquery_client.query(query, job_config=bq_job_config)
             query_job_result = query_job.result()
             self.total_bytes_billed += query_job.total_bytes_billed
             logger.debug(f"This job had {query_job.total_bytes_billed} bytes. Total: {self.total_bytes_billed}")
-            return query_job_result
+        except Exception as e:
+            logger.error(f"Error ({str(e)}) in setting up schema for query {query}.")
+            raise Exception(e)
+
+        return query_job_result
 
     # Execute a query against the MyLA database
     def execute_myla_query(self, query: str, params: Optional[Dict] = None) -> ResultProxy:
@@ -226,7 +226,10 @@ def update_unizin_metadata(self):
 
         logger.debug(metadata_sql)
 
-        status += self.util_function(metadata_sql, 'unizin_metadata')
+        try:
+            status += self.util_function(metadata_sql, 'unizin_metadata')
+        except Exception as e:
+            logger.warn(f"Could not directly access metadata, this is likely just an issue when using synthetic data.")
 
         return status
 
@@ -313,6 +316,7 @@ def update_resource_access(self):
                         'canvas_event_urls', 'STRING', settings.CANVAS_EVENT_URLS))
                 job_config = bigquery.QueryJobConfig()
                 job_config.query_parameters = query_params
+                job_config.connection_properties = [bigquery.ConnectionProperty("dataset_project_id", settings.DATASET_PROJECT_ID)]
 
                 # Location must match that of the dataset(s) referenced in the query.
                 bq_job = self.bigquery_client.query(final_query, location='US', job_config=job_config)
@@ -651,8 +655,10 @@ def do(self) -> str:
 
         # continue cron tasks
 
+        logger.info("** term")
         status += self.update_term()
 
+        exception_in_run = False
         if len(self.valid_locked_course_ids) == 0:
             logger.info("Skipping course-related table updates...")
             status += "Skipped course-related table updates.\n"
@@ -676,6 +682,7 @@ def do(self) -> str:
                     status += str(e)
                     exception_in_run = True
 
+        logger.info("** informational")
         status += self.update_unizin_metadata()
 
         all_str_course_ids = set(

diff --git a/dashboard/settings.py b/dashboard/settings.py
@@ -449,6 +449,12 @@ def apply_env_overrides(env: Dict[str, Any], environ: os._Environ) -> Dict[str,
 # Only need view permission for exports
 IMPORT_EXPORT_EXPORT_PERMISSION_CODE = 'view'
 
+# Change the default project ID for BigQuery if needed (This is typically the one that quotas are run against and logged into)
+DEFAULT_PROJECT_ID = ENV.get("DEFAULT_PROJECT_ID", None)
+
+# Override the default project ID for BigQuery if needed, like to unizin-shared
+DATASET_PROJECT_ID = ENV.get("DATASET_PROJECT_ID", None)
+
 # IMPORT LOCAL ENV
 # =====================
 try: