diff --git a/fedlearner-sgx-dev.dockerfile b/fedlearner-sgx-dev.dockerfile index 56e6a9d8e..915c70c09 100644 --- a/fedlearner-sgx-dev.dockerfile +++ b/fedlearner-sgx-dev.dockerfile @@ -6,6 +6,8 @@ ENV DEBIAN_FRONTEND=noninteractive ENV INSTALL_PREFIX=/usr/local ENV LD_LIBRARY_PATH=${INSTALL_PREFIX}/lib:${INSTALL_PREFIX}/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH} ENV PATH=${INSTALL_PREFIX}/bin:${LD_LIBRARY_PATH}:${PATH} +# For Gramine RA-TLS +ENV PYTHONDONTWRITEBYTECODE=1 # Add steps here to set up common dependencies RUN apt-get update \ @@ -60,8 +62,8 @@ RUN apt-get install -y libcurl4-openssl-dev libprotobuf-c-dev python3-protobuf p RUN apt-get install -y libgmp-dev libmpfr-dev libmpc-dev libisl-dev nasm protobuf-compiler RUN ln -s /usr/bin/python3 /usr/bin/python \ - && pip3 install --upgrade pip \ - && pip3 install toml meson pyelftools + && pip3 install --no-compile --upgrade pip -i https://mirrors.aliyun.com/pypi/simple/ \ + && pip3 install --no-compile toml meson pyelftools -i https://mirrors.aliyun.com/pypi/simple/ RUN git clone https://github.com/analytics-zoo/gramine ${GRAMINEDIR} \ && cd ${GRAMINEDIR} \ @@ -101,8 +103,8 @@ ENV GRPC_VERSION=v1.38.1 RUN git clone --recurse-submodules -b ${GRPC_VERSION} https://github.com/grpc/grpc ${GRPC_PATH} -RUN pip3 install --upgrade pip \ - && pip3 install -r ${GRPC_PATH}/requirements.txt +RUN pip3 install --no-compile --upgrade pip -i https://mirrors.aliyun.com/pypi/simple/ \ + && pip3 install --no-compile -r ${GRPC_PATH}/requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ # Tensorflow dependencies ENV BAZEL_VERSION=3.1.0 @@ -110,8 +112,8 @@ ENV TF_VERSION=v2.4.2 ENV TF_BUILD_PATH=/tf/src ENV TF_BUILD_OUTPUT=/tf/output -RUN pip3 install --upgrade pip \ - && pip3 install numpy keras_preprocessing +RUN pip3 install --no-compile --upgrade pip -i https://mirrors.aliyun.com/pypi/simple/ \ + && pip3 install --no-compile numpy keras_preprocessing -i https://mirrors.aliyun.com/pypi/simple/ RUN wget "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel_${BAZEL_VERSION}-linux-x86_64.deb" \ && dpkg -i bazel_*.deb @@ -127,7 +129,7 @@ RUN apt-get install -y libmysqlclient-dev COPY sgx/grpc/common ${GRPC_PATH} COPY sgx/grpc/v1.38.1 ${GRPC_PATH} -RUN pip3 install 'cython==0.29.36' +RUN pip3 install --no-compile 'cython==0.29.36' -i https://mirrors.aliyun.com/pypi/simple/ RUN ${GRPC_PATH}/build_python.sh # Build tensorflow @@ -150,17 +152,17 @@ RUN if [ -f ${FEDLEARNER_PATH}/docker/hadoop-mt-2.7.0.tar.gz ]; then mkdir -p /o # For meituan hadoop auth RUN apt-get install -y libkrb5-dev openjdk-8-jdk -RUN pip3 install --upgrade pip \ - && pip3 install -r ${FEDLEARNER_PATH}/requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ +RUN pip3 install --no-compile --upgrade pip -i https://mirrors.aliyun.com/pypi/simple/ \ + && pip3 install --no-compile -r ${FEDLEARNER_PATH}/requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ RUN cd ${FEDLEARNER_PATH} \ && make protobuf \ && python3 setup.py bdist_wheel \ - && pip3 install ./dist/*.whl + && pip3 install --no-compile ./dist/*.whl # Re-install tensorflow, uninstall tensorflow_io, mock it RUN pip3 uninstall -y tensorflow tensorflow-io \ - && pip3 install ${TF_BUILD_OUTPUT}/*.whl + && pip3 install --no-compile ${TF_BUILD_OUTPUT}/*.whl # Re-install fedlearner plugin RUN cd ${FEDLEARNER_PATH} \ @@ -170,7 +172,7 @@ RUN cd ${FEDLEARNER_PATH} \ # Re-install grpcio RUN pip3 uninstall -y grpcio \ - && pip3 install ${GRPC_PATH}/dist/grpcio*.whl + && pip3 install --no-compile ${GRPC_PATH}/dist/grpcio*.whl # For debug RUN apt-get install -y strace gdb ctags vim @@ -182,6 +184,11 @@ COPY sgx/configs / RUN echo "enabled=0" > /etc/default/apport RUN echo "exit 0" > /usr/sbin/policy-rc.d +# For gramine ra-tls +RUN dpkg --remove --force-depends libgtk2.0-0 \ + && pip3 uninstall -y numpy keras_preprocessing protobuf \ + && pip3 install --no-compile numpy keras_preprocessing protobuf -i https://mirrors.aliyun.com/pypi/simple/ + # Clean tmp files RUN apt-get clean all \ && rm -rf /var/lib/apt/lists/* \ diff --git a/fedlearner/trainer/trainer_worker.py b/fedlearner/trainer/trainer_worker.py index 4827272b6..caa5bf292 100644 --- a/fedlearner/trainer/trainer_worker.py +++ b/fedlearner/trainer/trainer_worker.py @@ -232,6 +232,33 @@ def _run_master(role, cluster_server = ClusterServer(cluster_spec, "master", server_port=args.server_port) + # use Meituan hadoop + # first:convert Meituan HDFS path to local storage path, if local exit psi result file, user local file + # second:if local not exit psi result file,from Meituan HDFS download to local + if args.using_mt_hadoop: + data_path = args.data_path + if data_path: + local_data_path = get_local_temp_path(data_path) + if not exists(local_data_path): + data_path = mt_hadoop_download(data_path) + else: + data_path = local_data_path + args.data_path = data_path + + checkpoint_path = args.checkpoint_path + if checkpoint_path: + args.checkpoint_path = get_local_temp_path(checkpoint_path) + + load_checkpoint_path = args.load_checkpoint_path + if load_checkpoint_path: + args.load_checkpoint_path = get_local_temp_path(load_checkpoint_path) + if not exists(args.load_checkpoint_path): + mt_hadoop_download(load_checkpoint_path) + + export_path = args.export_path + if export_path: + args.export_path = get_local_temp_path(export_path) + checkpoint_filename_with_path = _get_checkpoint_filename_with_path(args) data_visitor = _create_data_visitor(args) master_factory = LeaderTrainerMaster \ @@ -479,33 +506,6 @@ def train(role, if not isinstance(role, str) or role.lower() not in (LEADER, FOLLOER): raise ValueError("--role must set one of %s or %s"%(LEADER, FOLLOER)) - # use Meituan hadoop - # first:convert Meituan HDFS path to local storage path, if local exit psi result file, user local file - # second:if local not exit psi result file,from Meituan HDFS download to local - if args.using_mt_hadoop: - data_path = args.data_path - if data_path: - local_data_path = get_local_temp_path(data_path) - if not exists(local_data_path): - data_path = mt_hadoop_download(data_path) - else: - data_path = local_data_path - args.data_path = data_path - - checkpoint_path = args.checkpoint_path - if checkpoint_path: - args.checkpoint_path = get_local_temp_path(checkpoint_path) - - load_checkpoint_path = args.load_checkpoint_path - if load_checkpoint_path: - args.load_checkpoint_path = get_local_temp_path(load_checkpoint_path) - if not exists(args.load_checkpoint_path): - mt_hadoop_download(load_checkpoint_path) - - export_path = args.export_path - if export_path: - args.export_path = get_local_temp_path(export_path) - if args.loglevel: fl_logging.set_level(args.loglevel) diff --git a/sgx/gramine/CI-Examples/generate-token/python.manifest.template b/sgx/gramine/CI-Examples/generate-token/python.manifest.template index 573bbd4c0..185e75f55 100644 --- a/sgx/gramine/CI-Examples/generate-token/python.manifest.template +++ b/sgx/gramine/CI-Examples/generate-token/python.manifest.template @@ -67,12 +67,75 @@ sgx.trusted_files = [ "file:/usr/{{ arch_libdir }}/", "file:/etc/ssl/certs/ca-certificates.crt", "file:/etc/default/apport", - "file:/usr/local/lib/", - "file:{{ python.stdlib }}/", - "file:{{ python.distlib }}/", "file:/etc/mime.types", "file:/gramine/leader/", - "file:/gramine/follower/" + "file:/gramine/follower/", + "file:/usr/local/lib/x86_64-linux-gnu/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/libtensorflow_framework.so.2", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/__init__.py", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/__internal__/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/__operators__/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/audio/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/autodiff/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/autograph/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/bitwise/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/compat/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/config/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/data/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/debugging/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/distribute/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/dtypes/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/errors/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/experimental/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/feature_column/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/graph_util/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/image/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/io/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/linalg/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/lite/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/lookup/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/math/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/mixed_precision/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/mlir/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/nest/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/nn/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/profiler/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/quantization/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/queue/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/ragged/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/random/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/raw_ops/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/saved_model/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/sets/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/signal/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/sparse/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/strings/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/summary/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/sysconfig/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/test/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/tpu/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/train/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/types/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/version/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/_api/v2/xla/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/compiler/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/core/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/include/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/keras/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/python/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/tools/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/lite/experimental/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/lite/toco/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow/xla_aot_runtime_src/", + "file:/usr/local/lib/python3.6/dist-packages/pyspark/", + "file:/usr/local/lib/python3.6/dist-packages/Cython/", + "file:/usr/local/lib/python3.6/dist-packages/numpy/", + "file:/usr/local/lib/python3.6/dist-packages/grpc/", + "file:/usr/local/lib/python3.6/dist-packages/google/", + "file:/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/", + "file:/usr/local/lib/python3.6/dist-packages/sklearn/", + "file:/usr/local/lib/python3.6/dist-packages/fedlearner/", + "file:/usr/local/lib/python3.6/dist-packages/pandas/" ] sgx.allowed_files = [ @@ -81,6 +144,9 @@ sgx.allowed_files = [ "file:/opt/meituan/", "file:/usr/lib/ssl/openssl.cnf", "file:/usr/lib/gcc", + "file:/usr/local/lib/", + "file:{{ python.stdlib }}/", + "file:{{ python.distlib }}/", "file:/etc/ethers", "file:/etc/hosts", "file:/etc/group", @@ -103,4 +169,4 @@ sgx.allowed_files = [ "file:/lib/", "file:/bin/", "file:/data/", -] \ No newline at end of file +]