https://github.com/kermitt2/grobid
Raw File
Tip revision: 8460241600c1382f9d2b05e5c8c3d08f61d8cc81 authored by Luca Foppiano on 26 March 2024, 12:03:50 UTC
typos
Tip revision: 8460241
build.gradle
buildscript {
    repositories {
        mavenLocal()
        mavenCentral()
        maven {
            url 'https://plugins.gradle.org/m2/'
        }
    }
    dependencies {
        classpath 'gradle.plugin.org.kt3k.gradle.plugin:coveralls-gradle-plugin:2.12.0'
        classpath "gradle.plugin.com.github.jengelman.gradle.plugins:shadow:7.0.0"
        classpath 'com.adarshr:gradle-test-logger-plugin:2.0.0'
    }
}

repositories {
    mavenLocal()
    mavenCentral()
}

apply plugin: 'jacoco'

jacoco {
    toolVersion = '0.8.8'
}

allprojects {
    apply plugin: 'java-library'
    apply plugin: 'base'
    apply plugin: 'com.github.kt3k.coveralls'
    apply plugin: 'com.adarshr.test-logger'

    group = "org.grobid"

    tasks.withType(JavaCompile) {
        options.encoding = 'UTF-8'
        // note: the following is not working
        options.compilerArgs << '-parameters'
    }
}

subprojects {
    apply plugin: 'java'
    apply plugin: 'maven-publish'

    publishing {
        publications {
            mavenJava(MavenPublication) {
                from components.java
                //artifact jar 
            }
        }
        repositories {
            mavenLocal()
        }
    }

    sourceCompatibility = 1.11
    targetCompatibility = 1.11

    repositories {
        mavenCentral()
        maven {
            url new File(rootProject.rootDir, "grobid-core/localLibs")
        }
        maven { url "https://jitpack.io" }
    }

    configurations {
        all*.exclude group: 'org.slf4j', module: "slf4j-log4j12"
        all*.exclude group: 'log4j', module: "log4j"
        implementation.setCanBeResolved(true)
    }

    ext {
        // treating them separately, these jars will be flattened into grobid-core.jar on installing,
        // to avoid missing dependencies from the projects that include grobid-core (see 'jar' task in grobid-core)
        localLibs = ['crfpp-1.0.2.jar',
                     'langdetect-1.1-20120112.jar',
                     'wipo-analysers-0.0.2.jar',
                     'imageio-pnm-1.0.jar',
                     'wapiti-1.5.0.jar']
    }

    dependencies {
        // packaging local libs inside grobid-core.jar
        implementation fileTree(dir: new File(rootProject.rootDir, 'grobid-core/localLibs'), include: localLibs)

        testRuntimeOnly 'org.junit.vintage:junit-vintage-engine:5.9.3'
        testImplementation(platform('org.junit:junit-bom:5.9.3'))
        testImplementation('org.junit.jupiter:junit-jupiter')
        testImplementation 'org.easymock:easymock:5.1.0'
        testImplementation "org.powermock:powermock-api-easymock:2.0.7"
        testImplementation "org.powermock:powermock-module-junit4:2.0.7"
        testImplementation "xmlunit:xmlunit:1.6"
        testImplementation "org.hamcrest:hamcrest-all:1.3"

        implementation "com.cybozu.labs:langdetect:1.1-20120112"
        implementation "com.rockymadden.stringmetric:stringmetric-core_2.11:0.27.4"
        implementation "commons-pool:commons-pool:1.6"
        implementation "commons-io:commons-io:2.5"
        implementation "org.apache.commons:commons-lang3:3.6"
        implementation "org.apache.commons:commons-collections4:4.1"
        implementation 'org.apache.commons:commons-text:1.11.0'
        implementation "commons-dbutils:commons-dbutils:1.7"
        implementation "com.google.guava:guava:31.0.1-jre"
        implementation "org.apache.httpcomponents:httpclient:4.5.3"
        implementation "black.ninia:jep:4.0.2"

        implementation "com.fasterxml.jackson.core:jackson-core:2.14.3"
        implementation "com.fasterxml.jackson.core:jackson-databind:2.14.3"
        implementation "com.fasterxml.jackson.module:jackson-module-afterburner:2.14.3"
        implementation "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.14.3"
    }

    task sourceJar(type: Jar) {
        description = 'A jar that contains source code'
        archiveClassifier = 'sources'
        from project.sourceSets.main.java
    }

    artifacts {
        archives sourceJar
        archives jar
    }

    //compileJava.dependsOn(changeVersionIfNeeded)

//    uploadArchives {
//        // if you want to enable uploading to some maven repo, add those properties to ~/.gradle/gradle.properties, e.g.:
//        /*
//            mavenRepoUserName=maven_username
//            mavenRepoPassword=super_secret
//            mavenRepoReleasesUrl=https://nexus3.example.org/repository/maven-releases/
//            mavenRepoSnapshotsUrl=https://nexus3.example.org/repository/maven-snapshots/
//        */
//        def user = project.hasProperty('mavenRepoUserName') ? project.findProperty('mavenRepoUserName') : ''
//        def password = project.hasProperty('mavenRepoPassword') ? project.findProperty('mavenRepoPassword') : ''
//        def rurl = project.hasProperty('mavenRepoReleasesUrl') ? project.findProperty('mavenRepoReleasesUrl') : ''
//        def surl = project.hasProperty('mavenRepoSnapshotsUrl') ? project.findProperty('mavenRepoSnapshotsUrl') : ''
//
//        repositories.mavenDeployer {
//            repository(url: rurl) {
//                authentication(userName: user, password: password)
//            }
//            snapshotRepository(url: surl) {
//                authentication(userName: user, password: password)
//            }
//
//        }
//    }

    test {
        useJUnitPlatform()
        
        testLogging.showStandardStreams = true
        // enable for having separate test executor for different tests
        forkEvery = 1
        maxHeapSize = "1024m"        

        def libraries = ""
        if (Os.isFamily(Os.FAMILY_MAC)) {
            if (Os.OS_ARCH.equals("aarch64")) {
                libraries = "${file("./grobid-home/lib/mac_arm-64").absolutePath}"
            } else {
                libraries = "${file("./grobid-home/lib/mac-64").absolutePath}"
            }
        } else if (Os.isFamily(Os.FAMILY_UNIX)) {            
            def jepDir = rootProject.rootDir.getAbsolutePath() + "/grobid-home/lib/lin-64/jep"
            libraries = jepDir
            jepDir = rootProject.rootDir.getAbsolutePath() + "/grobid-home/lib/lin-64"
            libraries += ":"+jepDir
        } else {
            throw new RuntimeException("Unsupported platform!")
        }

        if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
            jvmArgs "--add-opens", "java.base/java.util.stream=ALL-UNNAMED", 
            "--add-opens", "java.base/java.io=ALL-UNNAMED", "--add-opens", "java.xml/jdk.xml.internal=ALL-UNNAMED"
        }
        systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
    }
}

/** SUBPROJECTS **/

project("grobid-core") {
    apply plugin: 'com.github.johnrengelman.shadow'
    apply plugin: 'jacoco'

    configurations {
        shadedLib
    }

    dependencies {
        implementation(group: 'xml-apis', name: 'xml-apis') {
            // otherwise xml-apis 2.0.1 will come from XOM and will result in
            // java.lang.ClassNotFoundException: org.w3c.dom.ElementTraversal
            //TODO: sort out this problem better
            version {
                strictly '1.4.01'
            }
        }

        // Logs
        implementation 'org.slf4j:slf4j-api:1.7.30'
        implementation 'ch.qos.logback:logback-classic:1.2.3'

        implementation "org.apache.pdfbox:pdfbox:2.0.18"

        api "xerces:xercesImpl:2.12.0"
        api "net.arnx:jsonic:1.3.10"
        api "net.sf.saxon:Saxon-HE:9.6.0-9"
        api "xom:xom:1.3.2"
        api 'javax.xml.bind:jaxb-api:2.3.0'

        implementation "joda-time:joda-time:2.9.9"
        implementation "org.apache.lucene:lucene-analyzers-common:4.5.1"
        implementation 'black.ninia:jep:4.0.2'
        implementation 'org.apache.opennlp:opennlp-tools:1.9.1'
        implementation group: 'org.jruby', name: 'jruby-complete', version: '9.2.13.0'

        shadedLib "org.apache.lucene:lucene-analyzers-common:4.5.1"
    }

    jar {
        from {
            project.configurations.runtimeClasspath.collect {
                it.isDirectory() ? [] : localLibs.contains(it.getName()) ? zipTree(it) : []
            }
        }
        exclude("logback.xml")
        duplicatesStrategy = DuplicatesStrategy.EXCLUDE
    }

    shadowJar {
        archiveClassifier = 'onejar'
        mergeServiceFiles()
        zip64 true
        manifest {
            attributes 'Main-Class': 'org.grobid.core.main.batch.GrobidMain'
        }
        from sourceSets.main.output

        from {
            project.configurations.runtimeClasspath.collect {
                it.isDirectory() ? [] : localLibs.contains(it.getName()) ? zipTree(it) : []
            }
        }

        configurations = [project.configurations.shadedLib, project.configurations.runtimeClasspath]
        relocate 'org.apache.lucene', 'org.grobid.shaded.org.apache.lucene'
    }

    artifacts {
        archives jar
        archives shadowJar
    }

    processResources {
        filesMatching('grobid-version.txt') {
            filter {
                it.replace('project.version', project.property('version'))
            }
        }
    }

    task install {
        dependsOn publishToMavenLocal
        dependsOn 'shadowJar'
    }
}

project("grobid-home") {
    task packageGrobidHome(type: Zip) {
        zip64 true
        from('.') {
            include("config/*")
            include("language-detection/**")
            include("sentence-segmentation/**")
            include("lib/**")
            include("pdfalto/**")
            include("models/**")
            include("lexicon/**")
            include("schemas/**")
            include("scripts/**")
            exclude("models/**/*.old")
        }
        into("grobid-home")
    }
    artifacts {
        archives packageGrobidHome
    }
}

import org.apache.tools.ant.taskdefs.condition.Os

project(":grobid-service") {
    apply plugin: 'application'
    apply plugin: 'jacoco'
    apply plugin: 'com.github.johnrengelman.shadow'

    mainClassName = 'org.grobid.service.main.GrobidServiceApplication'

    tasks.run {
        def libraries = ""
        if (Os.isFamily(Os.FAMILY_MAC)) {
            if (Os.OS_ARCH.equals("aarch64")) {
                libraries = "${file("../grobid-home/lib/mac_arm-64").absolutePath}"
            } else {
                libraries = "${file("../grobid-home/lib/mac-64").absolutePath}"
            }
        } else if (Os.isFamily(Os.FAMILY_UNIX)) {
            libraries = "${file("../grobid-home/lib/lin-64/jep").absolutePath}:" +
                "${file("../grobid-home/lib/lin-64").absolutePath}:"
        } else  {
            throw new RuntimeException("Unsupported platform!")
        }
        
        if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
            jvmArgs "--add-opens", "java.base/java.lang=ALL-UNNAMED"
        }
        workingDir = rootProject.rootDir
        def javaLibraryPath = "${System.getProperty('java.library.path')}:" + libraries
//        if (System.env.CONDA_PREFIX) {
//            def condaEnv = "${System.env.CONDA_PREFIX}/lib"
//            def pythonDirectory = file(condaEnv).listFiles({ it.toString().contains("/lib/python") } as FileFilter)?.first()
//            def pythonVersion = (pythonDirectory =~ /python([0-9]\.[0-9]+)/)[0][1]
//
//            javaLibraryPath = "${System.getProperty('java.library.path')}:" +
//                libraries + ":" +
//                "${System.env.CONDA_PREFIX}/lib:" +
//                "${System.env.CONDA_PREFIX}/lib/python${pythonVersion}/site-packages/jep"
//        }
        systemProperty "java.library.path", javaLibraryPath
    }

    tasks.distZip.enabled = true
    tasks.distTar.enabled = false
    //tasks.distZip.zip64 = true
    tasks.shadowDistZip.enabled = false
    tasks.shadowDistTar.enabled = false

    distZip { duplicatesStrategy = DuplicatesStrategy.EXCLUDE }
    distTar { duplicatesStrategy = DuplicatesStrategy.EXCLUDE }

    dependencies {
        implementation project(':grobid-core') 
        implementation project(':grobid-trainer')

        //Dropwizard
        implementation 'ru.vyarus:dropwizard-guicey:7.0.0'

        implementation 'io.dropwizard:dropwizard-bom:4.0.0'
        implementation 'io.dropwizard:dropwizard-core:4.0.0'
        implementation 'io.dropwizard:dropwizard-assets:4.0.0'
        implementation 'io.dropwizard:dropwizard-testing:4.0.0'
        implementation 'io.dropwizard.modules:dropwizard-testing-junit4:4.0.0'
        implementation 'io.dropwizard:dropwizard-forms:4.0.0'
        implementation 'io.dropwizard:dropwizard-client:4.0.0'
        implementation 'io.dropwizard:dropwizard-auth:4.0.0'
        implementation 'io.dropwizard.metrics:metrics-core:4.2.22'
        implementation 'io.dropwizard.metrics:metrics-servlets:4.2.22'
        
        implementation "org.apache.pdfbox:pdfbox:2.0.3"
        implementation "javax.activation:activation:1.1.1"
        implementation "io.prometheus:simpleclient_dropwizard:0.16.0"
        implementation "io.prometheus:simpleclient_servlet:0.16.0"
    }

    shadowJar {
        archiveClassifier = 'onejar'
        mergeServiceFiles()
        zip64 true
        manifest {
            attributes 'Main-Class': 'org.grobid.core.main.batch.GrobidMain'
        }

        exclude("logback.xml")

        duplicatesStrategy = DuplicatesStrategy.EXCLUDE
    }

    artifacts {
        archives shadowJar
    }

    distributions {
        main {
            contents {
                //from(new File(rootProject.rootDir, "grobid-service/README.md")) {
                //    into "doc"
                //}
                from(new File(rootProject.rootDir, "../grobid-home/config/grobid.yaml")) {
                    into "config"
                }
                from(new File(rootProject.rootDir, "grobid-service/build/scripts/*")) {
                    into "bin"
                }
            }
        }
    }
}

project(":grobid-trainer") {
    apply plugin: 'com.github.johnrengelman.shadow'
    apply plugin: 'jacoco'

    dependencies {
        implementation(group: 'xml-apis', name: 'xml-apis') {
            // otherwise xml-apis 2.0.1 will come from XOM and will result in
            // java.lang.ClassNotFoundException: org.w3c.dom.ElementTraversal
            //TODO: sort out this problem better
            version {
                strictly '1.4.01'
            }
        }
        implementation project(':grobid-core')
        implementation "com.rockymadden.stringmetric:stringmetric-core_2.10:0.27.3"
        implementation "me.tongfei:progressbar:0.9.0"

        // logs
        implementation 'org.slf4j:slf4j-api:1.7.30'
        implementation 'ch.qos.logback:logback-classic:1.2.3'
    }

    configurations {
    }

    jar {
        from {
            project.configurations.runtimeClasspath.collect {
                it.isDirectory() ? [] : localLibs.contains(it.getName()) ? zipTree(it) : []
            }
        }
        exclude("logback.xml")

        duplicatesStrategy = DuplicatesStrategy.EXCLUDE
    }

    shadowJar {
        archiveClassifier = 'onejar'
        mergeServiceFiles()
        zip64 true
        manifest {
            attributes 'Main-Class': 'org.grobid.trainer.TrainerRunner'
        }

        from('src/main/resources') {
            include '*.xml'
        }

        duplicatesStrategy = DuplicatesStrategy.EXCLUDE
    }

    artifacts {
        archives shadowJar
        archives jar
    }

    task install {
        dependsOn publishToMavenLocal
        dependsOn 'shadowJar'
    }

    def trainerTasks = [
        "train_name_header"           : "org.grobid.trainer.NameHeaderTrainer",
        "train_name_citation"         : "org.grobid.trainer.NameCitationTrainer",
        "train_affiliation_address"   : "org.grobid.trainer.AffiliationAddressTrainer",
        "train_header"                : "org.grobid.trainer.HeaderTrainer",
        "train_fulltext"              : "org.grobid.trainer.FulltextTrainer",
        "train_shorttext"             : "org.grobid.trainer.ShorttextTrainer",
        "train_figure"                : "org.grobid.trainer.FigureTrainer",
        "train_table"                 : "org.grobid.trainer.TableTrainer",
        "train_citation"              : "org.grobid.trainer.CitationTrainer",
        "train_date"                  : "org.grobid.trainer.DateTrainer",
        "train_segmentation"          : "org.grobid.trainer.SegmentationTrainer",
        "train_reference_segmentation": "org.grobid.trainer.ReferenceSegmenterTrainer",
        "train_ebook_model"           : "org.grobid.trainer.EbookTrainer",
        "train_patent_citation"       : "org.grobid.trainer.PatentParserTrainer",
        "train_funding_acknowledgement" : "org.grobid.trainer.FundingAcknowledgementTrainer"
    ]

    def libraries = ""
    if (Os.isFamily(Os.FAMILY_MAC)) {
        if (Os.OS_ARCH.equals("aarch64")) {
            libraries = "${file("../grobid-home/lib/mac_arm-64").absolutePath}"    
        } else {
            libraries = "${file("../grobid-home/lib/mac-64").absolutePath}"
        }
    } else if (Os.isFamily(Os.FAMILY_UNIX)) {
        libraries = "${file("../grobid-home/lib/lin-64/jep").absolutePath}:" +
            "${file("../grobid-home/lib/lin-64").absolutePath}:"
    } else  {
        throw new RuntimeException("Unsupported platform!")
    }
    
    trainerTasks.each { taskName, mainClassName ->
        tasks.create(name: taskName, type: JavaExec, group: 'modeltraining') {
            main = mainClassName
            classpath = sourceSets.main.runtimeClasspath
            if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0)
                jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
            systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
        }
    }

    // evaluation tasks
    ext.getArg = { propName, defaultVal ->
        return project.hasProperty(propName) ? project.getProperty(propName) : defaultVal;
    }

    // run like this:
    // ./gradlew jatsEval -Pp2t=/path/to/goldenSet
    // ./gradlew jatsEval -Pp2t=/path/to/goldenSet -Prun=1 -PfileRatio=0.1
    // ./gradlew teiEval -Pp2t=/path/to/goldenSet
    // ./gradlew PrepareDOIMatching -Pp2t=ABS_PATH_TO_PMC/PMC_sample_1943 
    // ./gradlew EvaluateDOIMatching -Pp2t=ABS_PATH_TO_PMC/PMC_sample_1943 
    task(jatsEval, dependsOn: 'classes', type: JavaExec, group: 'modelevaluation') {
        main = 'org.grobid.trainer.evaluation.EndToEndEvaluation'
        classpath = sourceSets.main.runtimeClasspath
        args 'nlm', getArg('p2t', '.'), getArg('run', '0'), getArg('fileRatio', '1.0')
        if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
            jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
        } else {
            jvmArgs '-Xmx3072m'
        }
        systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
    }

    task(teiEval, dependsOn: 'classes', type: JavaExec, group: 'modelevaluation') {
        main = 'org.grobid.trainer.evaluation.EndToEndEvaluation'
        classpath = sourceSets.main.runtimeClasspath
        args 'tei', getArg('p2t', '.'), getArg('run', '0'), getArg('fileRatio', '1.0')
        if(JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
            jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
        } else {
            jvmArgs '-Xmx3072m'
        }
        systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
    }

    task(PrepareDOIMatching, dependsOn: 'classes', type: JavaExec, group: 'modelevaluation') {
        main = 'org.grobid.trainer.evaluation.EvaluationDOIMatching'
        classpath = sourceSets.main.runtimeClasspath
        args 'data', getArg('p2t', '.')
        if(JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
            jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
        } else {
            jvmArgs '-Xmx3072m'
        }
        systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
    }

    task(EvaluateDOIMatching, dependsOn: 'classes', type: JavaExec, group: 'modelevaluation') {
        main = 'org.grobid.trainer.evaluation.EvaluationDOIMatching'
        classpath = sourceSets.main.runtimeClasspath
        args 'eval', getArg('p2t', '.')
        if(JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
            jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
        } else {
            jvmArgs '-Xmx3072m'
        }
        systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
    }
}

/** JACOCO **/

tasks.register("codeCoverageReport", JacocoReport) {
    // If a subproject applies the 'jacoco' plugin, add the result it to the report
    subprojects { subproject ->
        subproject.plugins.withType(JacocoPlugin).configureEach {
            subproject.tasks.matching({ t -> t.extensions.findByType(JacocoTaskExtension) }).configureEach { testTask ->
                sourceSets subproject.sourceSets.main
                executionData(testTask)
            }

            // To automatically run `test` every time `./gradlew codeCoverageReport` is called,
            // you may want to set up a task dependency between them as shown below.
            // Note that this requires the `test` tasks to be resolved eagerly (see `forEach`) which
            // may have a negative effect on the configuration time of your build.
            subproject.tasks.matching({ t -> t.extensions.findByType(JacocoTaskExtension) }).forEach {
                rootProject.tasks.codeCoverageReport.dependsOn(it)
            }
        }
    }

    // XML -> coveralls,
    // HTML -> for manual check
    reports {
        xml.enabled true
        html.enabled true
        csv.enabled true
    }

}

/** COVERALLS **/
coveralls {
    sourceDirs = files(subprojects.sourceSets.main.allSource.srcDirs).files.absolutePath
}

tasks.coveralls {
    dependsOn codeCoverageReport
}

wrapper {
    gradleVersion "7.2"
}

build.dependsOn project.getSubprojects().collect({ it.getTasks().getByName("build") })
back to top