https://github.com/kermitt2/grobid
Raw File
Tip revision: 8460241600c1382f9d2b05e5c8c3d08f61d8cc81 authored by Luca Foppiano on 26 March 2024, 12:03:50 UTC
typos
Tip revision: 8460241
Dockerfile.crf
## Docker GROBID image

## Docker GROBID image using CRF models only - NOTE: you SHOULD use preferably the Deep Learning image

## See https://grobid.readthedocs.io/en/latest/Grobid-docker/

## docker build -t grobid/grobid:GROBID_VERSION --build-arg GROBID_VERSION=GROBID_VERSION .
## docker run -t --rm -p 8080:8070 -p 8081:8071 {image_name}

# To connect to the container with a bash shell
# > docker exec -i -t {container_name} /bin/bash

# -------------------
# build builder image
# -------------------
FROM openjdk:17-jdk-slim as builder

USER root

RUN apt-get update && \
    apt-get -y --no-install-recommends install unzip

WORKDIR /opt/grobid-source

# gradle
COPY gradle/ ./gradle/
COPY gradlew ./
COPY gradle.properties ./
COPY build.gradle ./
COPY settings.gradle ./

# source
COPY grobid-home/ ./grobid-home/
COPY grobid-core/ ./grobid-core/
COPY grobid-service/ ./grobid-service/
COPY grobid-trainer/ ./grobid-trainer/

# cleaning unused native libraries before packaging
RUN rm -rf grobid-home/pdf2xml
RUN rm -rf grobid-home/pdfalto/lin-32
RUN rm -rf grobid-home/pdfalto/mac-64
RUN rm -rf grobid-home/pdfalto/mac_arm-64
RUN rm -rf grobid-home/pdfalto/win-*
RUN rm -rf grobid-home/lib/lin-32
RUN rm -rf grobid-home/lib/win-*
RUN rm -rf grobid-home/lib/mac-64

# cleaning Delft models
RUN rm -rf grobid-home/models/*-BidLSTM_CRF*

ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep"

RUN ./gradlew clean assemble --no-daemon  --info --stacktrace

WORKDIR /opt/grobid
RUN unzip -o /opt/grobid-source/grobid-service/build/distributions/grobid-service-*.zip && \
    mv grobid-service* grobid-service
RUN unzip -o /opt/grobid-source/grobid-home/build/distributions/grobid-home-*.zip && \
    chmod -R 755 /opt/grobid/grobid-home/pdfalto
RUN rm -rf grobid-source

# -------------------
# build runtime image
# -------------------
FROM openjdk:17-slim

RUN apt-get update && \
    apt-get -y --no-install-recommends install libxml2 libfontconfig && \
    rm -rf /var/lib/apt/lists/*

# Add Tini
ENV TINI_VERSION v0.19.0
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
RUN chmod +x /tini
ENTRYPOINT ["/tini", "-s", "--"]

WORKDIR /opt/grobid

COPY --from=builder /opt/grobid .

ENV GROBID_SERVICE_OPTS "-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep --add-opens java.base/java.lang=ALL-UNNAMED"

CMD ["./grobid-service/bin/grobid-service"]

ARG GROBID_VERSION

LABEL \
    authors="The contributors" \
    org.label-schema.name="GROBID" \
    org.label-schema.description="Image with GROBID service" \
    org.label-schema.url="https://github.com/kermitt2/grobid" \
    org.label-schema.version=${GROBID_VERSION}
back to top