https://github.com/kermitt2/grobid-ner
Raw File
Tip revision: cd02a2ab0696d8f0fded5c489f5a2dc1ff7d42c2 authored by lopez on 26 November 2023, 20:02:27 UTC
Merge branch 'master' of github.com:kermitt2/grobid-ner
Tip revision: cd02a2a
build.gradle
buildscript {
    repositories {
        mavenLocal()
        mavenCentral()
        maven {
            url 'https://plugins.gradle.org/m2/'
        }
    }
    dependencies {
        classpath "gradle.plugin.com.github.jengelman.gradle.plugins:shadow:7.0.0"
    }
}

plugins {
    id 'com.github.johnrengelman.shadow' version '7.0.0'
    id "de.undercouch.download" version "4.1.1"
}

repositories {
    mavenLocal()
    mavenCentral()
}

apply plugin: 'java-library'
apply plugin: 'base'
apply plugin: 'maven-publish'

wrapper {
    gradleVersion "7.2"
}

import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
import org.apache.tools.ant.taskdefs.condition.Os

ext {
    // treating them separately, these jars will be flattened into grobid-core.jar on installing,
    // to avoid missing dependencies from the projects that include grobid-core (see 'jar' taskin grobid-core)
    localLibs = []
}

dependencies {
    implementation(group: 'xml-apis', name: 'xml-apis') {
        // otherwise xml-apis 2.0.1 will come from XOM and will result in
        // java.lang.ClassNotFoundException: org.w3c.dom.ElementTraversal
        //TODO: sort out this problem better
        version {
            strictly '1.4.01'
        }
    }

    implementation 'org.grobid:grobid-core:0.8.0'
    implementation 'org.grobid:grobid-trainer:0.8.0'
    implementation 'black.ninia:jep:4.0.2'
    implementation 'org.apache.opennlp:opennlp-tools:1.9.1'
    implementation "joda-time:joda-time:2.9.9"
    implementation "org.apache.lucene:lucene-analyzers-common:4.5.1"
    implementation group: 'org.jruby', name: 'jruby-complete', version: '9.2.13.0'

    implementation 'commons-pool:commons-pool:1.6'
    implementation 'commons-io:commons-io:2.5'
    implementation 'org.apache.commons:commons-lang3:3.6'
    implementation 'org.apache.commons:commons-collections4:4.1'
    implementation 'org.slf4j:slf4j-api:1.7.25'
    implementation 'org.codehaus.woodstox:stax2-api:3.1.4'
    implementation 'org.codehaus.woodstox:woodstox-core-asl:4.4.0'
    implementation 'com.googlecode.clearnlp:clearnlp:1.3.1'

    //Apache commons
    implementation 'org.apache.commons:commons-collections4:4.1'
    implementation 'org.apache.commons:commons-lang3:3.6'
    implementation 'commons-logging:commons-logging:1.2'
    implementation 'commons-io:commons-io:2.6'
    implementation 'commons-pool:commons-pool:1.6'

    testImplementation 'junit:junit:4.12'
    testImplementation 'org.easymock:easymock:4.0.2'
    testImplementation 'org.hamcrest:hamcrest-all:1.3'

    implementation "com.fasterxml.jackson.core:jackson-core:2.10.1"
    implementation "com.fasterxml.jackson.core:jackson-databind:2.10.1"
    implementation "com.fasterxml.jackson.module:jackson-module-afterburner:2.10.1"
    implementation "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.10.1"
}

group = 'org.grobid'
version = '0.8.0'
description = 'grobid-ner'

sourceCompatibility = 1.11
targetCompatibility = 1.11

tasks.withType(JavaCompile) {
    options.encoding = 'UTF-8'
}

configurations {
    implementation.exclude group: "org.slf4j", module: "slf4j-jdk14"
    implementation.exclude group: 'org.slf4j', module: "slf4j-log4j12"
    implementation.exclude group: 'log4j', module: "log4j"
}

/** Model management **/
task copyModels(type: Copy) {
    from "resources/models"
    into "../grobid-home/models"
}

/** Training configuration **/
/*def trainerTasks = [
        //Training models
//        "train_temporalExpression": "org.grobid.trainer.TemporalExpressionTrainer",
        "train_ner"               : "org.grobid.trainer.NEREnglishTrainer",
        "train_nerfr"             : "org.grobid.trainer.NERFrenchTrainer",
        "train_nersense"          : "org.grobid.trainer.SenseTrainer",

        // CONLL
        "eval_conll"              : "org.grobid.trainer.NEREvaluation",
        "train_conll"             : "org.grobid.trainer.CoNNLNERTrainer"
]*/

def libraries = ""
if (Os.isFamily(Os.FAMILY_MAC)) {
    if (Os.OS_ARCH.equals("aarch64")) {
        libraries = "${file("../grobid-home/lib/mac_arm-64").absolutePath}"    
    } else {
        libraries = "${file("../grobid-home/lib/mac-64").absolutePath}"
    }
} else if (Os.isFamily(Os.FAMILY_UNIX)) {
    libraries = "${file("../grobid-home/lib/lin-64/jep").absolutePath}:" +
        "${file("../grobid-home/lib/lin-64").absolutePath}:"
} else  {
    throw new RuntimeException("Unsupported platform!")
}  

/*trainerTasks.each { taskName, mainClassName ->
    tasks.create(name: taskName, type: JavaExec, group: 'training') {
        main = mainClassName
        classpath = sourceSets.main.runtimeClasspath
        if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0)
            jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
        else 
            jvmArgs '-Xmx3072m'
        systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
    }
}*/
 
// return the default value if the property has not been specified in command line
ext.getArg = { propName, defaultVal ->
    return project.hasProperty(propName) ? project.getProperty(propName) : defaultVal;
}

// Train NER
// run like: ./gradlew trainNer 
task(trainNer, dependsOn: 'classes', type: JavaExec, group: 'training') {
    main = 'org.grobid.trainer.NERTrainerRunner'
    classpath = sourceSets.main.runtimeClasspath
    args 0, 'ner', '-gH', getArg('gH', '../grobid-home')
    if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
        jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
    } else {
        jvmArgs '-Xmx3072m'
    }
    systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}

// Train and eval NER (based on split ratio)
// run like: ./gradlew trainEvalNer 
task(trainEvalNer, dependsOn: 'classes', type: JavaExec, group: 'training') {
    main = 'org.grobid.trainer.NERTrainerRunner'
    classpath = sourceSets.main.runtimeClasspath
    args 2, 'ner', '-gH', getArg('gH', '../grobid-home'), '-s', getArg('s', '0.8')
    if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
        jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
    } else {
        jvmArgs '-Xmx3072m'
    }
    systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}

// Evaluation NER
// run like: ./gradlew evalNer 
task evalNer(dependsOn: 'classes', type: JavaExec, group: 'training') {
    main = 'org.grobid.trainer.NERTrainerRunner'
    classpath = sourceSets.main.runtimeClasspath
    args 1, 'ner', '-gH', getArg('gH', '../grobid-home')
    if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
        jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
    } else {
        jvmArgs '-Xmx3072m'
    }
    systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}

// Train NER FR
// run like: ./gradlew trainNerFr
task trainNerFr(dependsOn: 'classes', type: JavaExec, group: 'training') {
    main = 'org.grobid.trainer.NERTrainerRunner'
    classpath = sourceSets.main.runtimeClasspath
    args 0, 'nerfr', '-gH', getArg('gH', '../grobid-home')
    if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
        jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
    } else {
        jvmArgs '-Xmx3072m'
    }
    systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}

// Train and eval NER FR (based on split ratio)
// run like: ./gradlew trainEvalNerFr
task trainEvalNerFr(dependsOn: 'classes', type: JavaExec, group: 'training') {
    main = 'org.grobid.trainer.NERTrainerRunner'
    classpath = sourceSets.main.runtimeClasspath
    args 2, 'nerfr', '-gH', getArg('gH', '../grobid-home'), '-s', getArg('s', '0.8')
    if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
        jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
    } else {
        jvmArgs '-Xmx3072m'
    }
    systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}

// Evaluation NER FR
// run like: ./gradlew evalNerFr 
task evalNerFr(dependsOn: 'classes', type: JavaExec, group: 'training') {
    main = 'org.grobid.trainer.NERTrainerRunner'
    classpath = sourceSets.main.runtimeClasspath
    args 1, 'nerfr', '-gH', getArg('gH', '../grobid-home')
    if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
        jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
    } else {
        jvmArgs '-Xmx3072m'
    }
    systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}

// Train sense model
// run like: ./gradlew trainNerSense
task trainNerSense(dependsOn: 'classes', type: JavaExec, group: 'training') {
    main = 'org.grobid.trainer.NERTrainerRunner'
    classpath = sourceSets.main.runtimeClasspath
    args 0, 'nersense', '-gH', getArg('gH', '../grobid-home')
    if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
        jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
    } else {
        jvmArgs '-Xmx3072m'
    }
    systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}

task install(dependsOn: publishToMavenLocal)

publishing {
    publications {
        mavenJava(MavenPublication) {
            from components.java
        }
    }
    repositories {
        mavenLocal()
    }
}

task mainJar(type: ShadowJar) {
    zip64 true
    from sourceSets.main.output

    from {
        project.configurations.runtimeClasspath.collect {
//            it.isDirectory() ? [] : localLibs.contains(it.getName()) ? zipTree(it) : []
        }
    }
}

shadowJar {
    archiveClassifier = 'onejar'
    mergeServiceFiles()
    zip64 true
    manifest {
        attributes 'Main-Class': 'org.grobid.core.main.batch.NERMain'
    }
    //from sourceSets.main.output

    from {
        project.configurations.runtimeClasspath.collect {
            it.isDirectory() ? [] : localLibs.contains(it.getName()) ? zipTree(it) : []
        }
    }

    configurations = [project.configurations.runtimeClasspath]
}

jar {
    dependsOn mainJar
    enabled true
}

artifacts {
    archives jar
    archives shadowJar
    archives mainJar
}

/*tasks.withType(JavaCompile) {
    options.compilerArgs << "-Xlint:deprecation"
    options.compilerArgs << "-Xlint:unchecked"
}*/
back to top