RAG-Scraper / Dockerfile
CultriX's picture
Update Dockerfile
2909a76 verified
# Pin to Debian 12 so wkhtmltox bookworm package exists
FROM python:3.10-bookworm
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /app
# OS deps + fonts + X libs required by wkhtmltopdf
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates curl gnupg git xz-utils \
fontconfig fonts-dejavu-core \
libfreetype6 libjpeg62-turbo libpng16-16 \
libx11-6 libxext6 libxrender1 libxcb1 \
&& rm -rf /var/lib/apt/lists/*
# Install wkhtmltopdf (bookworm build)
ARG WKHTML_VER=0.12.6.1-3
RUN curl -fsSL -o /tmp/wkhtml.deb \
"https://github.com/wkhtmltopdf/packaging/releases/download/${WKHTML_VER}/wkhtmltox_${WKHTML_VER}.bookworm_amd64.deb" \
&& apt-get update \
&& apt-get install -y --no-install-recommends /tmp/wkhtml.deb \
&& rm -f /tmp/wkhtml.deb \
&& rm -rf /var/lib/apt/lists/*
RUN wkhtmltopdf --version
# Node.js LTS (for repomix)
RUN curl -fsSL https://deb.nodesource.com/setup_lts.x | bash - \
&& apt-get update && apt-get install -y --no-install-recommends nodejs \
&& rm -rf /var/lib/apt/lists/*
# repomix
RUN npm install -g repomix
# Poetry
RUN curl -sSL https://install.python-poetry.org | python3 -
ENV PATH="/root/.local/bin:$PATH"
RUN poetry config virtualenvs.create false
# deps first for better layer caching
COPY poetry.lock pyproject.toml /app/
RUN poetry install --no-root --no-interaction --no-ansi
RUN pip install gradio[mcp]
# app
COPY . .
EXPOSE 7860
ENV GRADIO_SERVER_NAME=0.0.0.0 \
GRADIO_SERVER_PORT=7860 \
GRADIO_MCP_SERVER=True
CMD ["python", "app.py"]