Klimatbyran · Greenheart · Nov 28, 2024 · Nov 28, 2024 · Nov 28, 2024 · Nov 28, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,10 +1,10 @@
 node_modules
 dist
 .env
-.aider*
 .env*
+!.env.example
+.aider*
 .DS_Store
-!env.example
 prisma/generated
 *.xlsx
 

diff --git a/pdf2markdown/.env.example b/pdf2markdown/.env.example
@@ -0,0 +1,6 @@
+# This file contains the environment variables that are used in the project.
+# Copy this file to .env and fill in the values for the environment variables.
+
+NODE_ENV=development
+
+OPENAI_API_KEY=
diff --git a/pdf2markdown/.gitignore b/pdf2markdown/.gitignore
@@ -0,0 +1,10 @@
+node_modules
+dist
+.env
+.env*
+!.env.example
+.aider*
+.DS_Store
+
+*.pdf
+*.code-workspace
diff --git a/pdf2markdown/.python-version b/pdf2markdown/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/pdf2markdown/Dockerfile b/pdf2markdown/Dockerfile
@@ -0,0 +1,47 @@
+FROM python:3.12-slim-bookworm
+
+ARG CPU_ONLY=false
+
+WORKDIR /app
+
+RUN apt-get update \
+    && apt-get install -y libgl1 libglib2.0-0 curl wget git \
+    && apt-get clean
+
+# Install Poetry and configure it
+RUN pip install poetry \
+    && poetry config virtualenvs.create false
+
+COPY pyproject.toml poetry.lock ./
+
+# Install dependencies before torch
+RUN poetry install --no-interaction --no-root
+
+# Install PyTorch separately based on CPU_ONLY flag
+# TODO: Use correct GPU build - see https://pytorch.org/ for details
+RUN if [ "$CPU_ONLY" = "true" ]; then \
+    pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu; \
+    else \
+    pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
+    fi
+
+ENV HF_HOME=/tmp/
+ENV TORCH_HOME=/tmp/
+ENV OMP_NUM_THREADS=4
+
+RUN python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; artifacts_path = StandardPdfPipeline.download_models_hf(force=True);'
+
+# Install Node.js 22
+RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
+    && apt-get install -y nodejs
+
+# Install app dependencies
+COPY package*.json ./
+RUN npm ci --omit=dev
+
+# Copy app source
+COPY . .
+
+EXPOSE 3000
+
+CMD npm start