diff --git a/examples/sagemaker-pipelines-graphbolt/README.md b/examples/sagemaker-pipelines-graphbolt/README.md
index 44499a3dd6..b98f912645 100644
--- a/examples/sagemaker-pipelines-graphbolt/README.md
+++ b/examples/sagemaker-pipelines-graphbolt/README.md
@@ -90,7 +90,7 @@ aws ec2 run-instances \
     --instance-type "m6in.4xlarge" \
     --key-name my-key-name \
     --block-device-mappings '[{
-        "DeviceName": "/dev/sdf",
+        "DeviceName": "/dev/sda1",
         "Ebs": {
             "VolumeSize": 300,
             "VolumeType": "gp3",
@@ -108,15 +108,14 @@ Once logged in, you can set up your Python environment to run GraphStorm
 ```bash
 conda init
 eval $SHELL
-conda create -y --name gsf python=3.10
-conda activate gsf
+# Available on the DLAMI, otherwise create a new conda env
+conda activate pytorch
 
 # Install dependencies
-pip install sagemaker boto3 ogb pyarrow
+pip install sagemaker[local] boto3 ogb pyarrow
 
 # Clone the GraphStorm repository to access the example code
 git clone https://github.com/awslabs/graphstorm.git ~/graphstorm
-cd ~/graphstorm/examples/sagemaker-pipelines-graphbolt
 ```
 
 ### Download and prepare datasets
@@ -136,12 +135,11 @@ You'lll download the smaller-scale [ogbn-arxiv](https://ogb.stanford.edu/docs/no
 BUCKET_NAME=<your-s3-bucket>
 ```
 
-
 You will use this script to directly download, transform and upload the data to S3:
 
-
 ```bash
-python convert_ogb_arxiv_to_gconstruct.py \
+cd ~/graphstorm/examples/sagemaker-pipelines-graphbolt
+python convert_arxiv_to_gconstruct.py \
     --output-prefix s3://$BUCKET_NAME/ogb-arxiv-input
 ```
 
@@ -188,12 +186,14 @@ bash build_and_push_papers100M_image.sh
 # $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/papers100m-processor
 
 # Run a SageMaker job to do the processing and upload the output to S3
-SAGEMAKER_EXECUTION_ROLE=<your-sagemaker-execution-role>
+SAGEMAKER_EXECUTION_ROLE_ARN=<your-sagemaker-execution-role-arn>
 ACCOUNT_ID=<your-aws-account-id>
 REGION=us-east-1
-python sagemaker_convert_papers100M.py \
+
+aws configure set region $REGION
+python sagemaker_convert_papers100m.py \
     --output-bucket $BUCKET_NAME \
-    --execution-role-arn $SAGEMAKER_EXECUTION_ROLE \
+    --execution-role-arn $SAGEMAKER_EXECUTION_ROLE_ARN \
     --region $REGION \
     --instance-type ml.m5.4xlarge \
     --image-uri  $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/papers100m-processor
@@ -201,6 +201,8 @@ python sagemaker_convert_papers100M.py \
 
 This will produce the processed data at `s3://$BUCKET_NAME/ogb-papers100M-input`  which can then be used as input to GraphStorm.
 
+> NOTE: Ensure your instance IAM profile is allow to perform `iam:GetRole` and `iam:GetPolicy` on your `SAGEMAKER_EXECUTION_ROLE_ARN`.
+
 
 #### [Optional] Prepare the ogbn-papers100M dataset locally
 
@@ -220,7 +222,8 @@ mkdir ~/papers100M-raw-data
 cd ~/papers100M-raw-data
 axel -n 16 http://snap.stanford.edu/ogb/data/nodeproppred/papers100M-bin.zip
 ripuznip unzip-file papers100M-bin.zip
-ripunzip unzip-file papers100M-bin/raw/data.npz && rm papers100M-bin/raw/data.npz
+cd papers100M-bin/raw
+ripunzip unzip-file data.npz && rm data.npz
 
 # Install process script dependencies
 python -m pip install \
@@ -232,6 +235,7 @@ python -m pip install \
 
 
 # Process and upload to S3, this will take around 20 minutes
+cd ~/graphstorm/examples/sagemaker-pipelines-graphbolt
 python convert_ogb_papers100m_to_gconstruct.py \
     --input-dir ~/papers100M-raw-data
     --output-dir s3://$BUCKET_NAME/ogb-papers100M-input
@@ -248,10 +252,6 @@ sudo apt update
 sudo apt install -y Docker.io
 docker -v
 
-# Enter you account ID here
-ACCOUNT_ID=<aws-account-id>
-REGION=us-east-1
-
 cd ~/graphstorm
 
 bash ./docker/build_graphstorm_image.sh --environment sagemaker --device cpu
@@ -259,9 +259,6 @@ bash ./docker/build_graphstorm_image.sh --environment sagemaker --device cpu
 bash docker/push_graphstorm_image.sh -e sagemaker -r $REGION -a $ACCOUNT_ID -d cpu
 # This will push an image to
 # ${ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/graphstorm:sagemaker-cpu
-
-# Install sagemaker with support for local mode
-pip install sagemaker[local]
 ```
 
 Next, you will create a SageMaker Pipeline to run the jobs that are necessary to train GNN models with GraphStorm.
@@ -276,10 +273,10 @@ In this section, you will create a [Sagemaker Pipeline](https://docs.aws.amazon.
 
 ```bash
 PIPELINE_NAME="ogbn-arxiv-gs-pipeline"
-BUCKET_NAME="my-s3-bucket"
+
 bash deploy_papers100M_pipeline.sh \
-    --account "<aws-account-id>" \
-    --bucket-name $BUCKET_NAME --role "<execution-role>" \
+    --account $ACCOUNT_ID \
+    --bucket-name $BUCKET_NAME --role $SAGEMAKER_EXECUTION_ROLE_ARN \
     --pipeline-name $PIPELINE_NAME \
     --use-graphbolt false
 ```
@@ -303,8 +300,8 @@ The ogbn-arxiv data are small enough that you can execute the pipeline locally.
 
 ```bash
 PIPELINE_NAME="ogbn-arxiv-gs-pipeline"
-cd ~/graphstorm/sagemaker/pipeline
-python execute_sm_pipeline.py \
+
+python ~/graphstorm/sagemaker/pipeline/execute_sm_pipeline.py \
     --pipeline-name $PIPELINE_NAME \
     --region us-east-1 \
     --local-execution | tee arxiv-local-logs.txt
@@ -382,7 +379,7 @@ bash deploy_arxiv_pipeline.sh \
     --pipeline-name $PIPELINE_NAME \
     --use-graphbolt true
 # Execute the pipeline locally
-python execute_sm_pipeline.py \
+python ~/graphstorm/sagemaker/pipeline/execute_sm_pipeline.py \
     --pipeline-name $PIPELINE_NAME \
     --region us-east-1 \
     --local-execution | tee arxiv-local-gb-logs.txt
@@ -439,6 +436,7 @@ Now you are ready to deploy your initial pipeline for papers-100M
 
 ```bash
 PIPELINE_NAME="ogb-papers100M-pipeline"
+cd ~/graphstorm/examples/sagemaker-pipelines-graphbolt/
 bash deploy_papers100M_pipeline.sh \
     --account <aws-account-id> \
     --bucket-name <s3-bucket> --role <execution-role> \
@@ -449,7 +447,7 @@ bash deploy_papers100M_pipeline.sh \
 Execute the pipeline and let it run the background.
 
 ```bash
-python execute_sm_pipeline.py \
+python ~/graphstorm/sagemaker/pipeline/execute_sm_pipeline.py \
     --pipeline-name $PIPELINE_NAME \
     --region us-east-1
     --async-execution
@@ -469,7 +467,7 @@ bash deploy_papers100M_pipeline.sh \
     --use-graphbolt true
 
 # Execute the GraphBolt-enabled pipeline on SageMaker
-python execute_sm_pipeline.py \
+python ~/graphstorm/sagemaker/pipeline/execute_sm_pipeline.py \
     --pipeline-name $PIPELINE_NAME \
     --region us-east-1 \
     --async-execution
diff --git a/examples/sagemaker-pipelines-graphbolt/convert_arxiv_to_gconstruct.py b/examples/sagemaker-pipelines-graphbolt/convert_arxiv_to_gconstruct.py
index dd66f09f01..456c2c5bdf 100644
--- a/examples/sagemaker-pipelines-graphbolt/convert_arxiv_to_gconstruct.py
+++ b/examples/sagemaker-pipelines-graphbolt/convert_arxiv_to_gconstruct.py
@@ -100,7 +100,7 @@ def convert_ogbn_arxiv(output_prefix: str):
                 "node_id_col": "nid",
                 "node_type": "node",
                 "format": {"name": "parquet"},
-                "files": [f"{output_prefix}/nodes/paper/nodes.parquet"],
+                "files": ["nodes/paper/nodes.parquet"],
                 "features": [
                     {
                         "feature_col": "feat",
@@ -118,9 +118,9 @@ def convert_ogbn_arxiv(output_prefix: str):
                         "task_type": "classification",
                         "custom_split_filenames": {
                             "column": "nid",
-                            "train": f"{output_prefix}/splits/train_idx.parquet",
-                            "valid": f"{output_prefix}/splits/valid_idx.parquet",
-                            "test": f"{output_prefix}/splits/test_idx.parquet",
+                            "train": "splits/train_idx.parquet",
+                            "valid": "splits/valid_idx.parquet",
+                            "test": "splits/test_idx.parquet",
                         },
                         "label_stats_type": "frequency_cnt",
                     }
@@ -133,14 +133,14 @@ def convert_ogbn_arxiv(output_prefix: str):
                 "dest_id_col": "dst",
                 "relation": ["node", "cites", "node"],
                 "format": {"name": "parquet"},
-                "files": [f"{output_prefix}/edges/paper-cites-paper/edges.parquet"],
+                "files": ["edges/paper-cites-paper/edges.parquet"],
             },
             {
                 "source_id_col": "dst",
                 "dest_id_col": "src",
                 "relation": ["node", "cites-rev", "node"],
                 "format": {"name": "parquet"},
-                "files": [f"{output_prefix}/edges/paper-cites-paper/edges.parquet"],
+                "files": ["/edges/paper-cites-paper/edges.parquet"],
             },
         ],
     }
@@ -160,4 +160,4 @@ def convert_ogbn_arxiv(output_prefix: str):
 if __name__ == "__main__":
     args = parse_args()
 
-    convert_ogbn_arxiv(args.output_prefix)
+    convert_ogbn_arxiv(args.output_s3_prefix)
diff --git a/examples/sagemaker-pipelines-graphbolt/convert_ogb_papers100m_to_gconstruct.py b/examples/sagemaker-pipelines-graphbolt/convert_ogb_papers100m_to_gconstruct.py
index 361c35b89d..497e9d7902 100644
--- a/examples/sagemaker-pipelines-graphbolt/convert_ogb_papers100m_to_gconstruct.py
+++ b/examples/sagemaker-pipelines-graphbolt/convert_ogb_papers100m_to_gconstruct.py
@@ -15,6 +15,7 @@
 
 Convert papers100M data and prepare for input to GConstruct
 """
+
 import argparse
 import gzip
 import json
@@ -87,7 +88,7 @@ def process_data(input_dir, output_dir, filesystem):
     num_nodes, num_features = node_feat.shape
     num_edges = edge_index.shape[1]
     logging.info(
-        f"Node features shape: {node_feat.shape:,}, Number of edges: {num_edges:,}"
+        f"Node features shape: {node_feat.shape}, Number of edges: {num_edges:,}"
     )
 
     # Define schemas for nodes and edges
diff --git a/examples/sagemaker-pipelines-graphbolt/deploy_arxiv_pipeline.sh b/examples/sagemaker-pipelines-graphbolt/deploy_arxiv_pipeline.sh
index e43b4f4335..91f3a1830f 100644
--- a/examples/sagemaker-pipelines-graphbolt/deploy_arxiv_pipeline.sh
+++ b/examples/sagemaker-pipelines-graphbolt/deploy_arxiv_pipeline.sh
@@ -26,15 +26,15 @@ parse_params() {
         case "${1-}" in
         -h | --help) usage ;;
         -x | --verbose) set -x ;;
-        -r | --role)
-            ROLE="${2-}"
+        -r | --execution-role)
+            ROLE_ARN="${2-}"
             shift
             ;;
         -a | --account)
             ACCOUNT="${2-}"
             shift
             ;;
-        -b | --bucket)
+        -b | --bucket-name)
             BUCKET_NAME="${2-}"
             shift
             ;;
@@ -54,8 +54,8 @@ parse_params() {
 
     # check required params and arguments
     [[ -z "${ACCOUNT-}" ]] && die "Missing required parameter: -a/--account <aws-account-id>"
-    [[ -z "${BUCKET-}" ]] && die "Missing required parameter: -b/--bucket <s3-bucket>"
-    [[ -z "${ROLE-}" ]] && die "Missing required parameter: -r/--role <execution-role-arn>"
+    [[ -z "${BUCKET_NAME-}" ]] && die "Missing required parameter: -b/--bucket <s3-bucket>"
+    [[ -z "${ROLE_ARN-}" ]] && die "Missing required parameter: -r/--execution-role <execution-role-arn>"
     [[ -z "${USE_GRAPHBOLT-}" ]] && die "Missing required parameter: -g/--use-graphbolt <true|false>"
 
     return 0
@@ -102,6 +102,7 @@ fi
 
 python3 $SCRIPT_DIR/../../sagemaker/pipeline/create_sm_pipeline.py \
     --cpu-instance-type ${TRAIN_CPU_INSTANCE} \
+    --execution-role "${ROLE_ARN}" \
     --graph-construction-args "--num-processes 8" \
     --graph-construction-instance-type ${GCONSTRUCT_INSTANCE} \
     --graph-construction-config-filename ${GCONSTRUCT_CONFIG} \
@@ -119,7 +120,6 @@ python3 $SCRIPT_DIR/../../sagemaker/pipeline/create_sm_pipeline.py \
     --partition-output-json ${PARTITION_OUTPUT_JSON} \
     --partition-algorithm ${PARTITION_ALGORITHM} \
     --region ${REGION} \
-    --role "${ROLE}" \
     --train-on-cpu \
     --train-inference-task ${TASK_TYPE} \
     --train-yaml-s3 "${TRAIN_YAML_S3}" \
diff --git a/examples/sagemaker-pipelines-graphbolt/deploy_papers100M_pipeline.sh b/examples/sagemaker-pipelines-graphbolt/deploy_papers100M_pipeline.sh
index d85a2edd1c..4f94a03cab 100644
--- a/examples/sagemaker-pipelines-graphbolt/deploy_papers100M_pipeline.sh
+++ b/examples/sagemaker-pipelines-graphbolt/deploy_papers100M_pipeline.sh
@@ -27,15 +27,15 @@ parse_params() {
         case "${1-}" in
         -h | --help) usage ;;
         -x | --verbose) set -x ;;
-        -r | --role)
-            ROLE="${2-}"
+        -r | --execution-role)
+            ROLE_ARN="${2-}"
             shift
             ;;
         -a | --account)
             ACCOUNT="${2-}"
             shift
             ;;
-        -b | --bucket)
+        -b | --bucket-name)
             BUCKET_NAME="${2-}"
             shift
             ;;
@@ -56,7 +56,7 @@ parse_params() {
     # check required params and arguments
     [[ -z "${ACCOUNT-}" ]] && die "Missing required parameter: -a/--account <aws-account-id>"
     [[ -z "${BUCKET-}" ]] && die "Missing required parameter: -b/--bucket <s3-bucket>"
-    [[ -z "${ROLE-}" ]] && die "Missing required parameter: -r/--role <execution-role-arn>"
+    [[ -z "${ROLE_ARN-}" ]] && die "Missing required parameter: -r/--execution-role <execution-role-arn>"
     [[ -z "${USE_GRAPHBOLT-}" ]] && die "Missing required parameter: -g/--use-graphbolt <true|false>"
 
     return 0
@@ -78,6 +78,7 @@ fi
 
 JOBS_TO_RUN="gconstruct train inference"
 
+DATASET_S3_PATH="s3://${BUCKET_NAME}/papers-100M-input"
 OUTPUT_PATH="s3://${BUCKET_NAME}/pipelines-output"
 GRAPH_NAME="papers-100M"
 INSTANCE_COUNT="4"
@@ -91,7 +92,7 @@ GSF_CPU_IMAGE_URI=${ACCOUNT}.dkr.ecr.$REGION.amazonaws.com/graphstorm:sagemaker-
 GSF_GPU_IMAGE_URI=${ACCOUNT}.dkr.ecr.$REGION.amazonaws.com/graphstorm:sagemaker-gpu
 
 GCONSTRUCT_CONFIG="gconstruct_config_papers100m.json"
-GRAPH_CONSTRUCTION_ARGS="--add-reverse-edges False --num-processes 16"
+GRAPH_CONSTRUCTION_ARGS="--num-processes 16"
 
 PARTITION_OUTPUT_JSON="metadata.json"
 PARTITION_OUTPUT_JSON="$GRAPH_NAME.json"
@@ -111,7 +112,7 @@ if [[ -z "${PIPELINE_NAME-}" ]]; then
 fi
 
 python3 $SCRIPT_DIR/../../sagemaker/pipeline/create_sm_pipeline.py \
-    --execution-role "${ROLE}" \
+    --execution-role "${ROLE_ARN}" \
     --cpu-instance-type ${CPU_INSTANCE_TYPE} \
     --gpu-instance-type ${TRAIN_GPU_INSTANCE} \
     --graph-construction-args "${GRAPH_CONSTRUCTION_ARGS}" \
@@ -124,7 +125,7 @@ python3 $SCRIPT_DIR/../../sagemaker/pipeline/create_sm_pipeline.py \
     --inference-yaml-s3 "${INFERENCE_YAML_S3}" \
     --input-data-s3 "${DATASET_S3_PATH}" \
     --instance-count ${INSTANCE_COUNT} \
-    --jobs-to-run "${JOBS_TO_RUN}" \
+    --jobs-to-run ${JOBS_TO_RUN} \
     --num-trainers ${NUM_TRAINERS} \
     --output-prefix-s3 "${OUTPUT_PATH}" \
     --pipeline-name "${PIPELINE_NAME}" \
diff --git a/examples/sagemaker-pipelines-graphbolt/process_papers100M.sh b/examples/sagemaker-pipelines-graphbolt/process_papers100M.sh
index d24d5b92a8..d99def1d53 100644
--- a/examples/sagemaker-pipelines-graphbolt/process_papers100M.sh
+++ b/examples/sagemaker-pipelines-graphbolt/process_papers100M.sh
@@ -5,7 +5,6 @@ trap cleanup SIGINT SIGTERM ERR EXIT
 cleanup() {
     trap - SIGINT SIGTERM ERR EXIT
     # script cleanup here
-    kill $DISK_USAGE_PID > /dev/null 2>&1 || true
 }
 
 # Download and unzip data in parallel
@@ -16,7 +15,6 @@ cd $TEMP_DATA_PATH || exit 1
 
 echo "Will execute script $1 with output prefix $2"
 
-
 echo "$(date -u '+%Y-%m-%dT%H:%M:%SZ'): Downloading files using axel, this will take at least 10 minutes depending on network speed"
 time axel -n 16 --quiet http://snap.stanford.edu/ogb/data/nodeproppred/papers100M-bin.zip
 
@@ -29,6 +27,6 @@ time ripunzip unzip-file data.npz && rm data.npz
 
 # Run the processing script
 echo "$(date -u '+%Y-%m-%dT%H:%M:%SZ'): Processing data and uploading to S3, this will take around 20 minutes"
-python3 /opt/ml/code/"$1" \
+time python3 /opt/ml/code/"$1" \
     --input-dir "$TEMP_DATA_PATH/papers100M-bin/" \
     --output-prefix "$2"
diff --git a/examples/sagemaker-pipelines-graphbolt/sagemaker_convert_papers100m.py b/examples/sagemaker-pipelines-graphbolt/sagemaker_convert_papers100m.py
new file mode 100644
index 0000000000..f611070595
--- /dev/null
+++ b/examples/sagemaker-pipelines-graphbolt/sagemaker_convert_papers100m.py
@@ -0,0 +1,94 @@
+"""
+Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License").
+You may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Launch SageMaker job to convert papers100M data and prepare for input to GConstruct
+"""
+import argparse
+import os
+
+from sagemaker.processing import ScriptProcessor
+from sagemaker.network import NetworkConfig
+from sagemaker import get_execution_role
+
+_ROOT = os.path.abspath(os.path.dirname(__file__))
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse job launch arguments"""
+    parser = argparse.ArgumentParser(
+        description="Convert Papers100M dataset to GConstruct format using SageMaker Processing."
+    )
+
+    parser.add_argument(
+        "--execution-role-arn",
+        type=str,
+        default=None,
+        help="SageMaker Execution Role ARN",
+    )
+    parser.add_argument(
+        "--region", type=str, required=True, help="SageMaker Processing region."
+    )
+    parser.add_argument("--image-uri", type=str, required=True)
+    parser.add_argument(
+        "--output-bucket",
+        type=str,
+        required=True,
+        help="S3 output bucket for processed papers100M data. "
+        "Data will be saved under ``<output-bucket>/ogb-papers100M-input/``",
+    )
+    parser.add_argument(
+        "--instance-type",
+        type=str,
+        default="ml.m5.4xlarge",
+        help="SageMaker Processing Instance type.",
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    """Launch the papers100M conversion job on SageMaker"""
+    args = parse_args()
+
+    # Create a ScriptProcessor to run the processing bash script
+    script_processor = ScriptProcessor(
+        command=["bash"],
+        image_uri=args.image_uri,
+        role=args.execution_role_arn or get_execution_role(),
+        instance_count=1,
+        instance_type=args.instance_type,
+        volume_size_in_gb=400,
+        max_runtime_in_seconds=8 * 60 * 60,  # Adjust as needed
+        base_job_name="papers100m-processing",
+        network_config=NetworkConfig(
+            enable_network_isolation=False
+        ),  # Enable internet access to be able to download the data
+    )
+
+    # Submit the processing job
+    script_processor.run(
+        code="process_papers100M.sh",
+        inputs=[],
+        outputs=[],
+        arguments=[
+            "convert_ogb_papers100m_to_gconstruct.py",
+            f"s3://{args.output_bucket}/papers-100M-input",
+        ],
+        wait=False,
+    )
+
+
+if __name__ == "__main__":
+    main()