Revision - a706147 - Introduced 'build.primary.replicas.only' mode in BnP.

Revision a7061474f905fe60a9581dbf4e14431d00b130a2 authored by Felix GV on 12 December 2015, 01:26:55 UTC, committed by Felix GV on 15 December 2015, 02:45:06 UTC

Introduced 'build.primary.replicas.only' mode in BnP.

Summary: This new mode provides the capability of pushing to
multiple clusters with different number of nodes and different
partition assignments.

Compatibility: Although this new mode only works if both the BnP
job and the Voldemort servers are upgraded, the change can be rolled
out gradually without breaking anything. There is a negotiation
phase at the beginning of the BnP job which determines if all
servers of all clusters are capable and willing (i.e.: configured)
of using the new mode. If not all servers are upgraded and enabled,
then the BnP job falls back to its old behavior. Likewise, if a
server gets a fetch request from a non-upgraded BnP job, it will
work just like before. By default, servers answer the negotiation
by saying they support the new mode. The old behavior can be forced
with the following server-side configuration:

readonly.build.primary.replicas.only=false

Running in this new mode has several implications:

1. When running in the new mode, store files are stored in the
   BnP output directory under nested partition directories, rather
   than in nested node directories.
2. The MR job uses half as many reducers and half as much shuffle
   bandwidth compared to before.
3. The meta checksum is now done per partition, rather than per node.
4. Instead of having one .metadata file per partition, there is now
   only a single full-store.metadata file at the root of the output
   directory.
5. The server-side HdfsFetcher code inspects the metadata file and
   determines if it should operate in 'build.primary.replicas.only'
   mode or not. If yes, then the server determines which partitions
   it needs to fetch on its own, rather than relying on what the BnP
   job placed in a node-specific directory.
6. The replica type number contained in Read-Only V2 file names is
   now useless, but we are keeping it in there just to avoid
   unnecessary changes.
7. When initializing a Read-Only V2 store directory, the server now
   looks for files named with the incorrect replica type, and if it
   finds any, it renames them to the replica type expected by this
   server.

Other changes:

1. Added socket port to Node's toString functions. Also made the
   output of the Node's toString(), briefToString() and getStateString()
   functions more consistent.
2. Introduced new Protobuf message for the GetConfig admin request.
   This new message is intended to be a generic way to retrieve any
   of server config.
3. Refactored VoldemortConfig to provide access to any config by its
   string key. Also cleaned up a lot of hard-coded strings, which are
   constants now.
4. Various minor refactorings in BnP code.

1 parent 69fcd3f

Files
Changes

Permalinks

build.gradle

import java.util.jar.JarEntry;

apply plugin: 'java'
apply plugin: 'idea'
apply plugin: 'eclipse'
apply plugin: 'war'

buildscript {
  repositories { jcenter() }
  dependencies {
    classpath 'com.github.jengelman.gradle.plugins:shadow:1.2.1'
  }
}

apply plugin: 'java' // or 'groovy'. Must be explicitly applied
apply plugin: 'com.github.johnrengelman.shadow'

def String getProjectProperty(String propertyName) {
    String propertyValue = "null"
    if (hasProperty(propertyName)) {
        propertyValue = this.properties[propertyName]
    }
    else {
        throw GradleScriptException("PropertyName " + propertyName + " is not defined in properties file")
    }
    return propertyValue
}
def projectName = "voldemort"

def sourceDir = getProjectProperty('src.dir')
def distDir = getProjectProperty('dist.dir')
def classesDir = getProjectProperty('classes.dir')
def javaDir = getProjectProperty('java.dir')
def privateLibDir = getProjectProperty('private.lib.dir')
def resourcesDir = getProjectProperty('resources.dir')
def javaDocDir = getProjectProperty('javadoc.dir')

def voldTestClassesDir = getProjectProperty('testclasses.dir')

def commonTestSrcDir = getProjectProperty('commontestsrc.dir')
def unitTestSrcDir = getProjectProperty('unittestsrc.dir')
def intTestSrcDir = getProjectProperty('inttestsrc.dir')
def longTestSrcDir = getProjectProperty('longtestsrc.dir')

def contribClassesDir = getProjectProperty('contrib.classes.dir')
def contribRootDir = getProjectProperty('contrib.root.dir')

def voldVersion = getProjectProperty('curr.release')
def javacVersion = getProjectProperty('javac.version')

//This is the javaCompile variable version. Directly defining 'def version' will override this and cause nightmare
version = voldVersion

def archiveDirectoryName = projectName + '-' + version
def archiveDirectoryPath = distDir + "/" + archiveDirectoryName

def deleteDirectoryContents(directory) {
    project.file(directory).deleteDir()
    project.file(directory).mkdirs()
}

println 'java source target compatibility version ' + javacVersion
sourceCompatibility = javacVersion
targetCompatibility = javacVersion
compileJava.options.debug = true

repositories {
    mavenCentral()
    maven {
        // For Hadoop dependencies
        url "https://repository.cloudera.com/artifactory/cloudera-repos/"
    }
    flatDir { dirs privateLibDir }
    flatDir { dirs contribRootDir }
}


sourceSets {
    main {
        java { srcDirs = [javaDir]}
        resources {
            srcDirs = [javaDir]
            include '**/*.xsd'
        }
        output.classesDir = classesDir
        output.resourcesDir = resourcesDir
    }
    test {
        java {
            srcDirs = [
                commonTestSrcDir ,
                unitTestSrcDir,
                intTestSrcDir ,
                longTestSrcDir
            ]
        }
        output.classesDir = voldTestClassesDir
    }
    contrib {
        java { srcDirs = [contribRootDir]}
        compileClasspath += sourceSets.main.runtimeClasspath + sourceSets.test.runtimeClasspath
        output.classesDir = contribClassesDir
    }
}

compileJava.doLast {
    project.copy {
        from (javaDir) { exclude '**/*.java','**/*.html','**/log4j.properties' }
        into classesDir
    }

    project.copy {
        // Theoretically this block can be replaced by including the log4j.properties in main resources.
        // But that causes the log4j.properties to be present in the voldJar . Not sure what is the
        // implication of this change, so avoiding it for now.
        from (javaDir) { include 'log4j.properties' }
        into resourcesDir
    }
}

compileTestJava.doLast {
    project.copy {
        from (commonTestSrcDir) { exclude '**/*.java','**/*.html' }
        from (unitTestSrcDir) { exclude '**/*.java','**/*.html' }
        into voldTestClassesDir
    }
}

compileContribJava.doLast {
    project.copy {
        into contribClassesDir
    }
}

task testJar(type: Jar) {
    baseName = projectName + "-test"
    from sourceSets.test.output
    destinationDir = project.file(distDir)
}

task voldJar(type:Jar) {
    baseName = projectName
    manifest {
        attributes 'Voldemort-Implementation-Version' : version,
        'Implementation-Title': 'Voldemort',
        'Implementation-Version': version,
        'Implementation-Vendor' :'LinkedIn'
    }
    from sourceSets.main.output
    destinationDir = project.file(distDir)
}

task contribJar(type:Jar) {
    dependsOn voldJar, testJar, sourceSets.contrib.output
    baseName = projectName + "-contrib"
    from sourceSets.contrib.output
    destinationDir = project.file(distDir)
}

task srcJar(type: Jar, dependsOn: classes) {
    classifier = 'src'
    from sourceSets.main.java.srcDirs
    destinationDir = project.file(distDir)
}

task bnpJar(dependsOn: shadowJar) {
    // Just a nicer more self-explanatory name than "shadowJar"
}

artifacts {
    archives voldJar
    archives testJar
    archives contribJar
    archives srcJar
}

clean {
    delete(distDir)
    delete('lib')
    doLast { deleteDirectoryContents(javaDocDir) }
}

// Dependencies used by both BnP and Voldemort
// TODO: Decide if we want to do that for all dependencies, even if they're used just in Voldemort...

def depAvro = 'org.apache.avro:avro:1.4.0'
def depProtoBuf = 'com.google.protobuf:protobuf-java:2.3.0'
def depJdom = 'org.jdom:jdom:1.1'
def depAzkaban = 'com.linkedin.azkaban:azkaban:2.5.0'
def depGuava = 'com.google.guava:guava:14.0.1'
def depLog4j = 'log4j:log4j:1.2.15'
def depJacksonMapper = 'org.codehaus.jackson:jackson-mapper-asl:1.9.13'
def depJoda = 'joda-time:joda-time:1.6'
def depTehuti = 'io.tehuti:tehuti:0.7.0'

shadowJar {
    classifier "bnp"
    from sourceSets.main.output, sourceSets.contrib.output, sourceSets.test.output, sourceSets.main.resources

    // Required when working in an Hadoop 2.x environment
    dependencies {
        include(dependency(depAvro))
        include(dependency(depProtoBuf))
        include(dependency(depJdom))
        include(dependency(depAzkaban))
        include(dependency(depGuava))
        include(dependency(depLog4j))
        include(dependency(depJacksonMapper))
        include(dependency(depJoda))
        include(dependency(depTehuti))
    }
    relocate 'com.google.protobuf', 'voldemort.shadow.2.3.0.com.google.protobuf'
    relocate 'org.apache.avro', 'voldemort.shadow.1.4.0.org.apache.avro'
    // TODO: find a way to exclude private lib's BDB-JE which gets pulled into the fat jar...
}

task copySources (type: Copy) {
    from ('.') { include 'bin/*.sh', 'bin/*.bat' , 'bin/*.py' }
    from ('.') { include  distDir + '/*.jar'}
    from ('.') { exclude distDir + '/**' ,'bin/**' , 'build/**', '.git/**' , '.gradle/**', 'config/**/data/**' }
    into archiveDirectoryPath
}

task zip (type: Zip) {
    dependsOn copySources
    baseName = projectName

    from(distDir) {
        include archiveDirectoryName + '/bin/**'
        fileMode = 0755
    }
    from(distDir) {
        include archiveDirectoryName + '/**'
        exclude archiveDirectoryName + '/bin/**'
    }

    destinationDir = project.file(distDir)
}

task tar (type: Tar) {
    dependsOn copySources
    compression = Compression.GZIP
    baseName = projectName
    extension = "tar.gz"

    from(distDir) {
        include archiveDirectoryName + '/bin/**'
        fileMode = 0755
    }
    from(distDir) {
        include archiveDirectoryName + '/**'
        exclude archiveDirectoryName + '/bin/**'
    }

    destinationDir = project.file(distDir)
}

task copyDeps(type: Copy) {
    from  {configurations.compile  }
    into "lib"
}

war {
    dependsOn copyDeps
    from sourceSets.main.output
    webXml = project.file('web.xml')
    destinationDir = project.file(distDir)
}

assemble.dependsOn copyDeps

jar.dependsOn contribJar,srcJar, copyDeps
compileContribJava.dependsOn voldJar
copySources.dependsOn jar

tasks.withType(Test) {
    // ant restarts jvm for each tests, If not restarted the test runs into outOfMemory even
    // if you set the JVM to 8gb. On inspecting most of the space is consumed by int[] of
    // Histogram in the NioSelectorManager. I believe this could be explained by
    // creating lots of client factory which creates lot of NIO threads. Did not proceed
    // further as I will be maintaining compatbility with ant. Also if you dont fork for each
    // tests JMX bean related tests will fail.

    // Do not set the max parallelism as there are tests that uses the same port and will
    // run into bind exceptions.
    maxHeapSize = "8g"
    forkEvery = 1


    // If ignoreFailures is not set, then merged reports will not be generated
    // Gradle aborts further tasks on test failure. so if you run junitAll
    // which runs 3 tests, reports task will never be run on failure cases.
    ignoreFailures = true
    //ignoreFailures = gradle.startParameter.continueOnFailure

    useJUnit()

    testLogging {
        events "started", "passed", "skipped", "failed"
        exceptionFormat = 'full'
        // showStandardStreams = true
    }

    afterTest { test, result ->
        logger.lifecycle("testFinished: $test, result: $result.resultType")
    }

    doFirst {
        def classesSize = candidateClassFiles.files.size()
        logger.lifecycle("{} starts executing {} test classes {}",
                path, classesSize, classesSize > 0? "(" + candidateClassFiles.files*.name[0] + ", ...)" : "")
    }

    //all standard error messages from tests will get routed to 'DEBUG' level messages.
    //logging.captureStandardError(LogLevel.DEBUG)
    //all standard output messages from tests will get routed to 'DEBUG' level messages.
    //logging.captureStandardOutput(LogLevel.DEBUG)

    //Set reasonable defaults for reports location
    reports.html.destination = file("$project.buildDir/reports/$name")
    reports.junitXml.destination = file("$project.buildDir/$name-results")

    //Set reasonable defaults classpath and classes dir. They can be reconfigured in an individual task.
    it.testClassesDir = sourceSets.test.output.classesDir
    classpath = sourceSets.test.runtimeClasspath

    // Makes sure tests aren't marked "UP-TO-DATE" after running
    outputs.upToDateWhen { false }
}

task resetConfig() {
    doLast {
        def DirsToDelete = [".temp", ".version", "data"]
        def deleteRecursively

        deleteRecursively = { file ->
            file.eachFile() {f ->
                if(f.directory) {
                    if( DirsToDelete.contains(f.getName()) )
                    {
                        println "deleting ${f.getAbsolutePath()}"
                        delete f
                    }
                    else
                    {
                        deleteRecursively(f)
                    }
                }
            }
        }

        deleteRecursively (new File("config"))
    }
}

task junit(dependsOn: test)

Collection<String> testClassesFrom(String dir, String include = '**/*Test.*') {
    //take all *Test.java files found in given dir, make the path relative and replace .java with .class
    fileTree(dir: dir, includes: [include]).collect { it.absolutePath.replace("\\", "/").replaceAll(file(dir).absolutePath.replace("\\", "/") + "/", "").replaceAll(".java\$", ".class")}
}

test {
    description = "Runs acceptance tests"
    include testClassesFrom(unitTestSrcDir)
}

task junitLong(type: Test) {
    description = "Runs long junit tests"
    include testClassesFrom(longTestSrcDir)
}

task junitInt(type: Test) {
    description = "Runs integration tests"
    include testClassesFrom(intTestSrcDir)
}

task junitRebalance(type: Test) {
    include testClassesFrom(unitTestSrcDir, '**/*Rebalance*Test.java')
}

task junitRebalanceLong(type: Test) {
    include testClassesFrom(longTestSrcDir, '**/*Rebalance*Test.java')
}

task contribJunit(type: Test) {
    description = "Run contrib junit tests except EC2 and Krati tests."
    it.testClassesDir = file(contribClassesDir)

    exclude '**/*PerformanceTest.class'
    exclude '**/*RemoteTest.class'
    exclude '**/Ec2*Test.class'
    exclude '**/Krati*Test.class'

    classpath += sourceSets.contrib.runtimeClasspath + sourceSets.contrib.output
}

task junitAll(type: TestReport) {
    reportOn test, junitLong, contribJunit
    destinationDir = file("$project.buildDir/reports/$name")
}

task aggregatedJunit(type: TestReport) {
    destinationDir = file("$project.buildDir/reports/$name")
}

tasks.withType(Test) {
    finalizedBy aggregatedJunit
    doLast { aggregatedJunit.reportOn it.binResultsDir }
}

task wrapper(type: Wrapper) { gradleVersion = '2.0' }


dependencies {
    // Avro serialization format
    compile depAvro

    // INTERNAL_LIBS azkaban version not found
    // azkaban-common-0.05.jar

    // INTERNAL_LIBS Used for tomcat deployment, not sure if anyone uses it
    // catalina-ant.jar , version not found in maven central

    // coders decoders containing the Base64,binary encoding
    compile 'commons-codec:commons-codec:1.4'

    // TRANSITIVE_DEPENDENCY Contrib jar depends on commons-configuration-1.6.jar
    // commons-configuration instead depends on commons-collection
    //compile 'commons-collections:commons-collections:3.2.1'

    // Used by MySql storage engine classes
    // The jar supports database connection pooling
    compile 'commons-dbcp:commons-dbcp:1.2.2'

    // commons io is used at many places
    // IOUtils, FileUtils and ByteArrayOutputStream
    compile 'commons-io:commons-io:2.1'

    // LZF compression strategy for store and tests.
    compile 'com.ning:compress-lzf:0.9.1'

    // Used all over the place for collections
    compile depGuava

    // used for readonly store hdfs fetcher.
    compile 'org.apache.hadoop:hadoop-auth:2.3.0-cdh5.1.5'

    // used at lots of places. Seems like there is some overlap between httpclient and core, but not clear
    compile 'org.apache.httpcomponents:httpclient:4.1.2'

    // contains both http server and client functionalities. Used for HttpResponse but could be used at more places.
    compile 'org.apache.httpcomponents:httpcore:4.1.2'

    // JSON mapping library from Java Objects to JSON
    compile depJacksonMapper

    // JSON processing library
    compile 'org.codehaus.jackson:jackson-core-asl:1.9.13'

    // Used for reading XML files and Document.
    compile depJdom

    // Jetty is used for HttpService and tests. Jetty Util is used for QueuedThreadPool class.
    compile 'org.mortbay.jetty:jetty-util:6.1.18'
    compile 'org.mortbay.jetty:jetty:6.1.18'

    // A line processing library for command line. No compile time dependency
    // Used by Voldemort shell
    compile 'jline:jline:0.9.94'

    // jna is library for invoking native functions
    // used in the readonly store
    compile 'net.java.dev.jna:jna:3.2.7'

    // joda time is replacement for Java Date and Time
    // used in readonly store code.
    compile depJoda

    // Used for argument command line parsing
    compile 'net.sf.jopt-simple:jopt-simple:4.6'

    // log4j - logger used in almost all files
    compile depLog4j

    // used in readonly store and Co-ordinator
    compile 'javax.mail:mail:1.4.1'

    // Used in co-ordinator and rest services
    compile 'io.netty:netty:3.5.8.Final'

    // TRANSITIVE_DEPENDENCY Paranamer is a library that allows the parameter names of non-private methods and constructors to be accessed at runtime
    // Avro has a dependency on paranamer
    // compile 'com.thoughtworks.paranamer:paranamer:2.1'

    // protobuf is a supported protocol format between voldemort client and server
    compile depProtoBuf

    // Servlet
    compile 'javax.servlet:servlet-api:2.5'

    // slf4j is another logging abstraction framework.
    // It is used by the apache.avro, apache.hadoop and r2 clients
    compile 'org.slf4j:slf4j-api:1.5.6'
    compile 'org.slf4j:slf4j-log4j12:1.5.6'

    // snappy is one of the supported compression strategies in voldemort
    compile 'org.iq80.snappy:snappy:0.2'

    // Velocity is a simple yet powerful Java-based template engine that renders data
    // from plain Java objects to text, xml, email, SQL, Post Script, HTML etc
    // Velocity is used for Http Server GUI
    compile 'org.apache.velocity:velocity:1.6.2'

    // TRANSITIVE_DEPENDENCY Apache XML Parser
    // used by jdom
    // compile 'xerces:xercesImpl:2.9.1'

    compile fileTree(dir: privateLibDir, includes: ['**/*.jar'])

    // cern library containing high performance Maps for int and double
    // Currently only used in the tests
    testCompile 'colt:colt:1.2.0'

    // Used in resource pool perf testing class
    testCompile 'commons-pool:commons-pool:1.5.2'

    testRuntime 'mysql:mysql-connector-java:5.1.31'

    // Used for unit tests and other automated testing
    testCompile 'junit:junit:4.6'

    // Mockito is written by our beloved friend Szczepan Faber :)
    // Mocking framework used in some tests
    testCompile 'org.mockito:mockito-all:1.8.5'

    contribCompile sourceSets.main.output
    contribCompile sourceSets.test.output

    // declaring contribCompile dependencies as compile dependencies
    // otherwise while copying dependencies to lib directory
    // conflict resolution is not done properly across sourceSets
    // and we end up with 2 versions of few jars like ( log4j, servlet etc. )
    compile 'commons-configuration:commons-configuration:1.6'
    compile('org.apache.hadoop:hadoop-core:2.3.0-mr1-cdh5.1.5') {
        exclude group: 'com.google.protobuf'
        exclude group: 'org.apache.avro'
    }
    compile('org.apache.hadoop:hadoop-common:2.3.0-cdh5.1.5') {
        exclude group: 'com.google.protobuf'
        exclude group: 'org.apache.avro'
    }
    compile('org.apache.hadoop:hadoop-hdfs:2.3.0-cdh5.1.5') {
        exclude group: 'com.google.protobuf'
        exclude group: 'org.apache.avro'
    }

    compile 'com.linkedin.pegasus:r2:1.8.3'
    compile 'com.linkedin.pegasus:data:1.8.3'
    compile 'com.linkedin.pegasus:pegasus-common:1.8.3'
    compile depAzkaban

    compile 'com.google.code.typica:typica:1.7.2'
    compile 'com.sna-projects.krati:krati:0.4.9'

    // Metrics
    compile depTehuti
    testCompile 'io.tehuti:tehuti:0.7.0:test'

    // Other libs...
    compile 'org.apache.tomcat:catalina-ant:6.0.43'
    compile 'org.apache.hadoop:libthrift:0.5.0.0'
    
    // rocksdb from maven
    compile 'org.rocksdb:rocksdbjni:3.13.1'

    // Bouncy Castle Libaray
    compile 'org.bouncycastle:bcprov-jdk15on:1.48'
}

eclipse {
    project {
        buildCommand  'org.eclipse.jdt.core.javabuilder'
    }
    jdt {
        // Currently the javac Version of jar is set to 1.5
        // But @Override works differently between both, so overriding here
        sourceCompatibility = targetCompatibility = 1.6
    }
    classpath {
        defaultOutputDir = project.file('classes')
        downloadSources=true
        file {
            // SourceSets creates multiple src/java in .classpath with varying includes
            // But eclipse 4.3 is complaining about duplicate classpaths
            // If contrib root is included instead of the seperate projects, eclipse
            // expects the package to be different. ( contrib.restclient.test.voldemort.client
            // versus voldemort.client ). So removing all the src entries and adding the previous
            // src entries which used to work.
            whenMerged { classpath ->
                classpath.entries.removeAll { entry ->
                    (entry.kind == 'src' )
                }
            }
            withXml {
                def node = it.asNode()
                [
                    "src/java",
                    "contrib/hadoop-store-builder/test",
                    "contrib/hadoop-store-builder/src/java",
                    "test/unit",
                    "test/integration",
                    "test/common",
                    "test/long",
                    "example/java",
                    "contrib/krati/src/java",
                    "contrib/krati/test",
                    "contrib/collections/src/java",
                    "contrib/collections/test",
                    "contrib/restclient/src/java",
                    "contrib/restclient/test"
                ]
                .each{
                    node.appendNode('classpathentry', [kind: 'src', path: "$it"])
                }
            }
        }
    }
}

idea {
  module {
    downloadJavadoc = true
    downloadSources = true
  }
}

Showing with 0 additions and 0 deletions (0 / 0 diffs computed)

Computing file changes ...