https://github.com/kermitt2/grobid-ner
Tip revision: cd02a2ab0696d8f0fded5c489f5a2dc1ff7d42c2 authored by lopez on 26 November 2023, 20:02:27 UTC
Merge branch 'master' of github.com:kermitt2/grobid-ner
Merge branch 'master' of github.com:kermitt2/grobid-ner
Tip revision: cd02a2a
build.gradle
buildscript {
repositories {
mavenLocal()
mavenCentral()
maven {
url 'https://plugins.gradle.org/m2/'
}
}
dependencies {
classpath "gradle.plugin.com.github.jengelman.gradle.plugins:shadow:7.0.0"
}
}
plugins {
id 'com.github.johnrengelman.shadow' version '7.0.0'
id "de.undercouch.download" version "4.1.1"
}
repositories {
mavenLocal()
mavenCentral()
}
apply plugin: 'java-library'
apply plugin: 'base'
apply plugin: 'maven-publish'
wrapper {
gradleVersion "7.2"
}
import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
import org.apache.tools.ant.taskdefs.condition.Os
ext {
// treating them separately, these jars will be flattened into grobid-core.jar on installing,
// to avoid missing dependencies from the projects that include grobid-core (see 'jar' taskin grobid-core)
localLibs = []
}
dependencies {
implementation(group: 'xml-apis', name: 'xml-apis') {
// otherwise xml-apis 2.0.1 will come from XOM and will result in
// java.lang.ClassNotFoundException: org.w3c.dom.ElementTraversal
//TODO: sort out this problem better
version {
strictly '1.4.01'
}
}
implementation 'org.grobid:grobid-core:0.8.0'
implementation 'org.grobid:grobid-trainer:0.8.0'
implementation 'black.ninia:jep:4.0.2'
implementation 'org.apache.opennlp:opennlp-tools:1.9.1'
implementation "joda-time:joda-time:2.9.9"
implementation "org.apache.lucene:lucene-analyzers-common:4.5.1"
implementation group: 'org.jruby', name: 'jruby-complete', version: '9.2.13.0'
implementation 'commons-pool:commons-pool:1.6'
implementation 'commons-io:commons-io:2.5'
implementation 'org.apache.commons:commons-lang3:3.6'
implementation 'org.apache.commons:commons-collections4:4.1'
implementation 'org.slf4j:slf4j-api:1.7.25'
implementation 'org.codehaus.woodstox:stax2-api:3.1.4'
implementation 'org.codehaus.woodstox:woodstox-core-asl:4.4.0'
implementation 'com.googlecode.clearnlp:clearnlp:1.3.1'
//Apache commons
implementation 'org.apache.commons:commons-collections4:4.1'
implementation 'org.apache.commons:commons-lang3:3.6'
implementation 'commons-logging:commons-logging:1.2'
implementation 'commons-io:commons-io:2.6'
implementation 'commons-pool:commons-pool:1.6'
testImplementation 'junit:junit:4.12'
testImplementation 'org.easymock:easymock:4.0.2'
testImplementation 'org.hamcrest:hamcrest-all:1.3'
implementation "com.fasterxml.jackson.core:jackson-core:2.10.1"
implementation "com.fasterxml.jackson.core:jackson-databind:2.10.1"
implementation "com.fasterxml.jackson.module:jackson-module-afterburner:2.10.1"
implementation "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.10.1"
}
group = 'org.grobid'
version = '0.8.0'
description = 'grobid-ner'
sourceCompatibility = 1.11
targetCompatibility = 1.11
tasks.withType(JavaCompile) {
options.encoding = 'UTF-8'
}
configurations {
implementation.exclude group: "org.slf4j", module: "slf4j-jdk14"
implementation.exclude group: 'org.slf4j', module: "slf4j-log4j12"
implementation.exclude group: 'log4j', module: "log4j"
}
/** Model management **/
task copyModels(type: Copy) {
from "resources/models"
into "../grobid-home/models"
}
/** Training configuration **/
/*def trainerTasks = [
//Training models
// "train_temporalExpression": "org.grobid.trainer.TemporalExpressionTrainer",
"train_ner" : "org.grobid.trainer.NEREnglishTrainer",
"train_nerfr" : "org.grobid.trainer.NERFrenchTrainer",
"train_nersense" : "org.grobid.trainer.SenseTrainer",
// CONLL
"eval_conll" : "org.grobid.trainer.NEREvaluation",
"train_conll" : "org.grobid.trainer.CoNNLNERTrainer"
]*/
def libraries = ""
if (Os.isFamily(Os.FAMILY_MAC)) {
if (Os.OS_ARCH.equals("aarch64")) {
libraries = "${file("../grobid-home/lib/mac_arm-64").absolutePath}"
} else {
libraries = "${file("../grobid-home/lib/mac-64").absolutePath}"
}
} else if (Os.isFamily(Os.FAMILY_UNIX)) {
libraries = "${file("../grobid-home/lib/lin-64/jep").absolutePath}:" +
"${file("../grobid-home/lib/lin-64").absolutePath}:"
} else {
throw new RuntimeException("Unsupported platform!")
}
/*trainerTasks.each { taskName, mainClassName ->
tasks.create(name: taskName, type: JavaExec, group: 'training') {
main = mainClassName
classpath = sourceSets.main.runtimeClasspath
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0)
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
else
jvmArgs '-Xmx3072m'
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}
}*/
// return the default value if the property has not been specified in command line
ext.getArg = { propName, defaultVal ->
return project.hasProperty(propName) ? project.getProperty(propName) : defaultVal;
}
// Train NER
// run like: ./gradlew trainNer
task(trainNer, dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'org.grobid.trainer.NERTrainerRunner'
classpath = sourceSets.main.runtimeClasspath
args 0, 'ner', '-gH', getArg('gH', '../grobid-home')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
} else {
jvmArgs '-Xmx3072m'
}
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}
// Train and eval NER (based on split ratio)
// run like: ./gradlew trainEvalNer
task(trainEvalNer, dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'org.grobid.trainer.NERTrainerRunner'
classpath = sourceSets.main.runtimeClasspath
args 2, 'ner', '-gH', getArg('gH', '../grobid-home'), '-s', getArg('s', '0.8')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
} else {
jvmArgs '-Xmx3072m'
}
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}
// Evaluation NER
// run like: ./gradlew evalNer
task evalNer(dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'org.grobid.trainer.NERTrainerRunner'
classpath = sourceSets.main.runtimeClasspath
args 1, 'ner', '-gH', getArg('gH', '../grobid-home')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
} else {
jvmArgs '-Xmx3072m'
}
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}
// Train NER FR
// run like: ./gradlew trainNerFr
task trainNerFr(dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'org.grobid.trainer.NERTrainerRunner'
classpath = sourceSets.main.runtimeClasspath
args 0, 'nerfr', '-gH', getArg('gH', '../grobid-home')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
} else {
jvmArgs '-Xmx3072m'
}
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}
// Train and eval NER FR (based on split ratio)
// run like: ./gradlew trainEvalNerFr
task trainEvalNerFr(dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'org.grobid.trainer.NERTrainerRunner'
classpath = sourceSets.main.runtimeClasspath
args 2, 'nerfr', '-gH', getArg('gH', '../grobid-home'), '-s', getArg('s', '0.8')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
} else {
jvmArgs '-Xmx3072m'
}
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}
// Evaluation NER FR
// run like: ./gradlew evalNerFr
task evalNerFr(dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'org.grobid.trainer.NERTrainerRunner'
classpath = sourceSets.main.runtimeClasspath
args 1, 'nerfr', '-gH', getArg('gH', '../grobid-home')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
} else {
jvmArgs '-Xmx3072m'
}
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}
// Train sense model
// run like: ./gradlew trainNerSense
task trainNerSense(dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'org.grobid.trainer.NERTrainerRunner'
classpath = sourceSets.main.runtimeClasspath
args 0, 'nersense', '-gH', getArg('gH', '../grobid-home')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED"
} else {
jvmArgs '-Xmx3072m'
}
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}
task install(dependsOn: publishToMavenLocal)
publishing {
publications {
mavenJava(MavenPublication) {
from components.java
}
}
repositories {
mavenLocal()
}
}
task mainJar(type: ShadowJar) {
zip64 true
from sourceSets.main.output
from {
project.configurations.runtimeClasspath.collect {
// it.isDirectory() ? [] : localLibs.contains(it.getName()) ? zipTree(it) : []
}
}
}
shadowJar {
archiveClassifier = 'onejar'
mergeServiceFiles()
zip64 true
manifest {
attributes 'Main-Class': 'org.grobid.core.main.batch.NERMain'
}
//from sourceSets.main.output
from {
project.configurations.runtimeClasspath.collect {
it.isDirectory() ? [] : localLibs.contains(it.getName()) ? zipTree(it) : []
}
}
configurations = [project.configurations.runtimeClasspath]
}
jar {
dependsOn mainJar
enabled true
}
artifacts {
archives jar
archives shadowJar
archives mainJar
}
/*tasks.withType(JavaCompile) {
options.compilerArgs << "-Xlint:deprecation"
options.compilerArgs << "-Xlint:unchecked"
}*/