Revision f4950f549113fd2a0cf6ac4c090fe98297f2ced9 authored by Eric Prud'hommeaux on 19 December 2018, 17:03:36 UTC, committed by Eric Prud'hommeaux on 19 December 2018, 17:05:31 UTC
1 parent 575ae8d
Raw File
validate
#!/usr/bin/env node

/*
validate -n <node> -d <data> -s <shape> -x <schema>
Validate node in data as shape in schema.

example invocations:
 from teh interwebs:
  validate -n http://a.example/Issue1 -d issues.ttl -s http://b.example/IssueShape -x http://tracker.example/schemas/Issue.shex

 from shex.js co with neighboring shexTest:
  ./bin/validate -x ../shexTest/schemas/1dotCode3.shex -s http://a.example/S1 -d ../shexTest/validation/IIssue1_Ip1_Io1.ttl -n http://a.example/Issue1


Extensions are loaded from ../extensions/*.js .
Running the test extension <http://shex.io/extensions/Test/> adorns the results with:
  "semActResults": {
    "extension": "http://shex.io/extensions/Test/",
    "effects": [
      "http://a.example/Issue1",
      "http://a.example/p1",
      "http://a.example/o1"
    ]
  }

The regular expression engine is loaded from either:
  absolute path, e.g. if you git cloned shex.js into /tmp:
    --regex-module /tmp/shex.js/lib/regex/nfax-val-1err
    --regex-module /tmp/shex.js/lib/regex/threaded-val-nerr
  or relative path to bin/validate directory:
    --regex-module ../lib/regex/nfax-val-1err
    --regex-module ../lib/regex/threaded-val-nerr
  or a module name in lib/regex:
    --regex-module nfax-val-1err
    --regex-module threaded-val-nerr

*/

var ShExUtil = require("../lib/ShExUtil");
var ShExLoader = require("../lib/ShExLoader");
var ShExWriter = require("../lib/ShExWriter"); // for verbose output
var ShExValidator = require("../lib/ShExValidator");
var ShEx = require("../shex");
var Path = require("path");
var Util = require("util");
var ValidatorOptions = { diagnose: true };
var SchemaOptions = { duplicateShape: "abort" };
// var RunMode = { NORMAL: 0, ERROR: 1, DRYRUN: 2, USAGE: 4, HELP: 6 };
const REGEX_MODULES = "../lib/regex/";
const MISC_NODES = true; // validate nodes not found in data.
const RDF_TYPE= "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
var orChoices = Object.keys(ShExValidator.options.or).join("|");
var parChoices = Object.keys(ShExValidator.options.partition).join("|");
var dupChoices =  ["abort", "replace", "ignore"].join("|"); // lib/ShExJison.jison::addShape(label, shape)

var CommandLineOptions = [
  { name: "verbose",   alias: "v", type: Boolean },
  { name: "quiet",     alias: "q", type: Boolean },
  { name: "query",                 type: Boolean },
  { name: "grep",                  type: Boolean },
  { name: "help",      alias: "h", type: Boolean },
  { name: "node",      alias: "n", type: String, typeLabel: "RDFTerm",   multiple: false, defaultValue: undefined, description: "node to validate" },
  { name: "node-type", alias: "t", type: String, typeLabel: "IRI",       multiple: false, defaultValue: undefined, description: "validate nodes of this type" },
  { name: "map",       alias: "m", type: String, typeLabel: "string",    multiple: false, defaultValue: undefined, description: "QueryMap" },
  { name: "mapURL",    alias: "M", type: String, typeLabel: "file|URL",  multiple: false, defaultValue: undefined, description: "QueryMap" },
  { name: "shape",     alias: "s", type: String, typeLabel: "IRI|Bnode", multiple: false, defaultValue: undefined, description: "shape to validate" },
  { name: "shex",      alias: "x", type: String, typeLabel: "file|URL",  multiple:  true, defaultValue:        [], description: "ShExC schema" },
  { name: "json",      alias: "j", type: String, typeLabel: "file|URL",  multiple:  true, defaultValue:        [], description: "ShExJ schema" },
  { name: "data",      alias: "d", type: String, typeLabel: "file|URL",  multiple:  true, defaultValue:        [], description: "Turtle data", defaultOption: true },
  { name: "jsonld",    alias: "l", type: String, typeLabel: "file|URL",  multiple:  true, defaultValue:        [], description: "JSON-LD data" },
  { name: "result",                type: String, typeLabel: "file|URL",  multiple: false, defaultValue: undefined, description: "expected JSON results" },
  { name: "serve",     alias: "S", type: String, typeLabel: "URL",       multiple: false, defaultValue: undefined, description: "server" },
  { name: "exec",                  type: String, typeLabel: "javascript code",            defaultValue: undefined, description: "run some js code with results" },
  { name: "duplicate-shape",       type: String, typeLabel: dupChoices,  multiple: false, defaultValue: undefined, description: "what to do with duplicate shapes" },
  { name: "or",                    type: String, typeLabel: orChoices,   multiple: false, defaultValue: undefined, description: "how '|' should be interpreted" },
  { name: "partition",             type: String, typeLabel: parChoices,  multiple: false, defaultValue: undefined, description: "how to explore a partion of triples into constraints" },
  { name: "json-manifest",         type: String, typeLabel: "file|URL",  multiple:  true, defaultValue:        [], description: "JSON manifest" },
  { name: "turtle-manifest",       type: String, typeLabel: "file|URL",  multiple:  true, defaultValue:        [], description: "Turtle manifest" },
  { name: "jsonld-manifest",       type: String, typeLabel: "file|URL",  multiple:  true, defaultValue:        [], description: "JSON-LD manifest" },
  { name: "test-name",             type: String, typeLabel: "IRI",       multiple:  true, defaultValue:        [], description: "what tests to run in the manifest" },
  { name: "invocation",alias: "i", type: Boolean, desc: "show invocation" },
  { name: "dry-run",   alias: "N", type: Boolean, desc: "prepare validation but don't execute" },
  { name: "regex-module",          type: String, typeLabel: "js module", multiple: false, defaultValue: undefined, description: "what regular expression modules to use" },
];

// Generate command line interface
var CLI = require("command-line-args")(CommandLineOptions);
var Argv1 = Path.relative("", process.argv[1]);
var CliDir = Path.relative("", Path.resolve(__dirname, "../test/cli")) + Path.sep;

function abort (msg) {
  console.error(msg);
  console.error(require('command-line-usage')([
    {
      header: 'validate',
      content: 'validate nodes in RDF graphs with respect to ShEx schemas.'
    },
    {
      header: 'Synopsis',
      content: [
        "validate (-x <ShExC> | -j <ShExJ>) -s <start shape> (-d <Turtle> | -l <JSON-LD>) -n <start node>",
        "validate (-x |-j)? (-s)? (-d|-l)? (-n)? -S http://localhost:8088/validate"
      ]
    },
    {
      header: 'Options',
      optionList: CommandLineOptions
    },
    {
      header: 'Examples',
      content: [
        {l: 'validate local files',
         r: Util.format('  validate -x %s1dotOr2dot.shex -s http://a.example/S1 -d %sp2p3.ttl -n x', CliDir, CliDir)}
      ]
    },
    {
      header: 'examples',
      content: [
        { l: "validate local files:",
          r: Util.format('  validate -x %s1dotOr2dot.shex -s http://a.example/S1 -d %sp2p3.ttl -n x', CliDir, CliDir) },
        { l: "validate web resources:",
          r: "  validate -x http://shex.io/examples/Issue.shex -s IssueShape -d http://shex.io/examples/Issue1.ttl -n Issue1" },
        { l: "run as a server:",
          r: Util.format('  validate -x http://localhost:8088/validate', CliDir) },
        { l: "",
          r: Util.format('  curl -i http://localhost:8088/validate -F "schema=@%s1dotOr2dot.shex" -F "shape=http://a.example/S1" -F "data=@%sp2p3.ttl" -F "node=x"', CliDir, CliDir) },
        { l: "run as a preloaded server:",
          r: Util.format('  validate -x %s1dotOr2dot.shex -s http://a.example/S1 -S http://localhost:8088/validate', CliDir) },
        { l: "",
          r: Util.format('  curl -i http://localhost:8088/validate -F "data=@%sp2p3.ttl" -F "node=x"', CliDir) }
      ]
    },
    {
      content: "Project home: " + require('chalk').underline("https://github.com/shexSpec/shex.js")
    }
                                              ]));
  process.exit(1);
}

/* Leverage N3.js's relative IRI parsing.
 * !! requires intimate (so intimate it makes me blush) knowledge of N3.js.
 */
var N3 = require("n3");

function findNodesAndValidate (loaded, parms, options) {
  function knownShape (label) {
    return label in loaded.schema.shapes;
  }
  function unknownShape (label) {
    throw Error(label + " not found in\n" + Object.keys(loaded.schema.shapes).map(n => "  " + n + "\n").join(''));
  }
  function someShape () {
    return Object.keys(loaded.schema.shapes)[0];
  }
  function knownNode (label) {
    return (loaded.data.getTriplesByIRI(label, null, null).length > 0 ||
            loaded.data.getTriplesByIRI(null, null, label).length > 0);
  }
  function knownType (label) {
    return (loaded.data.getTriplesByIRI(null, RDF_TYPE, label).length > 0);
  }
  function someIRInode () {
    var triples = loaded.data.getTriplesByIRI(null, null, null);
    for (var i = 0; i < triples.length; ++i)
      if (N3.Util.isIRI(triples[i].subject))
        return triples[i].subject;
    return triples.length > 0 ? triples[0].subject : ShExUtil.NotSupplied;
  };
  function allNodesWithType (type) {
    var triples = loaded.data.getTriplesByIRI(null, RDF_TYPE, type);
    return triples.length > 0 ? triples.map(t => t.subject) : [ShExUtil.NotSupplied];
  };
  function getTriples (s, p, o) {
    var get = s === ShEx.ShapeMap.focus ? "subject" : "object";
    return loaded.data.getTriplesByIRI(mine(s), mine(p), mine(o)).map(
      t => t[get]
    );
    function mine (term) {
      return term === ShEx.ShapeMap.focus || term === ShEx.ShapeMap.wildcard
        ? null
        : term;
    }
  }

  function _makeShapeMap (nodeP, shapeP, nodeTypeP) {
    // resolve relative shape and focus node names against the first schema and data source respectively.
    var nodes = nodeTypeP
        ? allNodesWithType(ShExUtil.parsePassedNode(nodeTypeP, loaded.dataMeta[0],
						    null, knownType, x =>  x))
        : typeof nodeP === "object" && nodeP.type === "TriplePattern"
        ? getTriples(nodeP.subject, nodeP.predicate, nodeP.object)
        : [ShExUtil.parsePassedNode(nodeP, loaded.dataMeta[0], someIRInode,
                                 MISC_NODES ? () => true : knownNode, x => x)]; // accept unknown nodes
    var shape, shapeLabel;

    if (!shapeP && loaded.schema.start) {
      shape = loaded.schema.start;
      shapeLabel = ShExValidator.start;
      ret.shape = shapeLabel;
    } else {
      var found = ShExUtil.parsePassedNode(shapeP, loaded.schemaMeta[0],
					   someShape, knownShape, unknownShape);
      if (found === ShExUtil.NotSupplied || found === ShExUtil.UnknownIRI)
        throw Error("shape " + shapeP + " not defined" + optsStr(shapeP, loaded.schema.shapes));
      // shape = { type: "ShapeRef", reference: found };

      shape = found;
      shapeLabel = shapeP;
      if (shapeP === undefined)
        console.log("Guessing shape " + found);
    }

    if (nodes[0] === ShExUtil.NotSupplied)
      throw Error("node not defined and no default found" +
                  ("node-type" in parms ?
                   " looking for type " + nodeTypeP :
                   ""));
    if (!MISC_NODES && nodes[0] === ShExUtil.UnknownIRI) {
      console.warn("node " + nodeP + " not found in data" +
                   optsStr("node" in parms ? nodeP : nodes[0], loaded.data._entities));
      nodes = [nodeP];
    }

    // console.log("runValidator ("+graph+", "+node+", "+schema+", "+shape+")");
    if (nodes[0] === undefined || (shape === undefined && !("start" in schema))) {
      var msgs = [];
      if (nodes[0] === undefined) {
        var subjectNodes = loaded.data.size < 50 ? Object.keys(loaded.data.getTriplesByIRI(null, null, null).reduce(function (r, t) { r[t.subject] = t.subject; return r; }, {})) : 0;
        msgs.push("No starting node specified" +
                  (subjectNodes.length ?
                   "; try -n with one of:\n" + subjectNodes.map(n => "  " + n + "\n").join("") :
                   "")
                 );
      }
      // Make sure we have a start node.
      if (shape === undefined && !("start" in schema)) {
        var schemaKeys = Object.keys(schema.shapes);
        schemaKeys.join(", ");
        msgs.push("No shape specified on command line or in shex" +
                  (schemaKeys.length < 50 ?
                   "; try -n with one of: " + schemaKeys.join(", ") :
                   "")
                 );
      }
      abort(msgs.join("\n"));
    }
    return nodes.map(node => ({ node: node, shape: shape }));
  }

  var ret = {};

  var shapeMap = "map" in parms
      ? parms.map.reduce(
        (acc, pair) => acc.concat(_makeShapeMap(pair.node, pair.shape, null)), []
      ).reduce(
        (acc, pair) => acc.seen.indexOf(pair.node) === -1
          ? {ret: acc.ret.concat(pair), seen: acc.seen.concat(pair.node)}
        : acc,
        {ret: [], seen: []}
      ).ret
      : _makeShapeMap(parms.node, parms.shape, parms["node-type"]);
  if (cmds.verbose) {
    var tz = shapeMap.map(p => { return p.node + " AS " + p.shape; });
    tz = tz.length > 1 ? JSON.stringify(tz) : tz[0];
    var w;
    new ShExWriter({simplifyParentheses: false }).
      writeSchema(schema, function (error, text, prefixes) {
        if (error) throw error;
        else w = text;
      });
    var shapeIn = shape ? shape + " in" : "";
    console.log("validating " + tz + " over " + loaded.data.size + " triples against " + w);
  }

  ret.results = runValidator(loaded.data, shapeMap, loaded.schema, options);

  function optsStr (key, dict) {
    var r = RegExp(key.substr(key.lastIndexOf("/")).toLowerCase(), "i");
    var opts = Object.keys(dict).reduce(function (ret, k) {
      if (k.match(r))
        ret.push(k);
      return ret;
    }, []);
    return opts.length === 0 ? "" :
      opts.length === 1 ? ": try " + opts[0] :
      ": try one of " + opts.join(" , ");
  }
  return ret;
}

function loadSchemaAndData (valParms, validatorOptions, schemaOptions) {
  if (valParms.shex.length > 1 && valParms.data.length === 0) {
    valParms.data = valParms.shex.splice(1); // push all but one into data
  }
  if (valParms.json.length > 1 && valParms.data.length === 0) {
    valParms.data = valParms.json.splice(1); // push all but one into data
  }
  if (valParms.data.length === 0 && valParms.jsonld.length === 0 && !valParms.serve) abort("No data specified");
  if (valParms.shex.length === 0 && valParms.json.length === 0 && !valParms.serve) abort("No shex specified"); 
  return Promise.all([
    // Loaded schema and data.
    ShExLoader.load(valParms.shex, valParms.json, valParms.data, valParms.jsonld, schemaOptions, undefined),

    // Expected results, if provided.
    !("result" in valParms) ? Promise.resolve(undefined) :
      typeof valParms.result === "boolean" ? Promise.resolve(valParms.result) :
      ShExLoader.GET(valParms.result).then(function (body) { 
        return ShExUtil.absolutizeResults(JSON.parse(body.text), body.url);
      }),

    // ShapeMap, if provided.
    "mapURL" in valParms ? ShExLoader.GET(valParms.mapURL).then(function (body) {
      return { shapeMapBody: body.text, shapeMapBase: valParms.mapURL };
    })
    : Promise.resolve("map" in valParms
                      ? { shapeMapBody: valParms.map,
                          shapeMapBase: "file://" + Path.resolve(process.cwd()) }
                      : undefined)
  ]).then(function (loadedAndResults) {
    /* loadedAndResults is something like: [
         { data: N3Store {…},
           dataMeta: [{
             base: "http://instance.example/",
             mediaType: "text/turtle",
             prefixes: {xsd: "http://www.w3.org/2001/XMLSchema#", …},
             url: "file:///…/shex.js/examples/ClinObs-with-birthdate.ttl"
           }],
           schema: {type: "Schema", …},
           schemaMeta: [{
             base: "http://schema.example/",
             mediaType: "text/shex",
             prefixes: {xsd: "http://www.w3.org/2001/XMLSchema#", …},
             url: "file:///…/shex.js/examples/ClinObs.shex"
           }]
         },
         { type: "ShapeTest", … },
         { shapeMapBody: '{node:"a", shape:"b"}' || "<a>@<b>"
           shapeMapBase: "http://shapeMap.example/"
         }
       ] */

    loadedAndResults[0].data =
      ShExUtil.makeN3DB(loadedAndResults[0].data);
    if (valParms.serve) {
      if (valParms.shape) {
        function knownShape (label) {
          return label in loadedAndResults[0].schema.shapes;
        }
        var found = ShExUtil.parsePassedNode(valParms.shape, loadedAndResults[0].schemaMeta[0],
                                             null, knownShape, loadedAndResults[0].schema.prefixes);
        if (found === ShExUtil.UnknownIRI)
          console.warn("Warning: could not resolve "+valParms.shape+" with loaded schema");
      }
      runServer(loadedAndResults[0], valParms, validatorOptions, schemaOptions);
    } else {
      if (loadedAndResults[2]) { // if we loaded a map
        try {
          valParms.map = ShExUtil.absolutizeShapeMap(JSON.parse(loadedAndResults[2].shapeMapBody),
                                                     loadedAndResults[0].dataMeta[0].base); // @@ relative to data[0]'s base instead of loadedAndResults[2].shapeMapBase -- is that good?
        } catch (e) {
          valParms.map = ShEx.ShapeMapParser.construct(loadedAndResults[2].shapeMapBase,
                                                       loadedAndResults[0].schemaMeta[0],
                                                       loadedAndResults[0].dataMeta[0])
            .parse(loadedAndResults[2].shapeMapBody);
        }
      }

      var res = findNodesAndValidate(loadedAndResults[0], valParms, validatorOptions, schemaOptions).results;
      if (cmds["dry-run"])
        return res;
      var passed = !("errors" in res);
      var expectedResults = loadedAndResults[1];
      // display results and return error codes
      if (expectedResults !== undefined) {
        var match =
            typeof expectedResults === "boolean" && expectedResults !== !passed ||
            JSON.stringify(expectedResults) == JSON.stringify(res);
        if (!valParms.quiet)
          console.log(match);
        // Exit status 3 means match failure.
        return Promise.resolve(match ? 0 : 3);
      } else {
        if (cmds.query) {
          var seen = {};
          function _dive1 (solns) {
            if (solns.type === "SolutionList" ||
                solns.type === "ShapeOrResults" ||
                solns.type === "ShapeOrResults") {
              solns.solutions.forEach(s => {
                _dive1(s.solution);
              });
            } else if (solns.type === "ShapeTest") {
              _dive1(solns.solution);
            } else if (solns.type === "OneOfSolutions" ||
                       solns.type === "EachOfSolutions") {
              solns.solutions.forEach(s => {
                _dive1(s);
              });
            } else if (solns.type === "OneOfSolution" ||
                       solns.type === "EachOfSolution") {
              solns.expressions.forEach(s => {
                _dive1(s);
              });
            } else if (solns.type === "TripleConstraintSolutions") {
              solns.solutions.map(s => {
                if (s.type !== "TestedTriple")
                  throw Error("unexpected result type: " + s.type);
                var fakeResolver = {
                  _base: "_base",
                  _basePath: "_basePath",
                  meta:{prefixes:{}}
                };
                var triple = [
                  rdflib_termToLex(s.subject, fakeResolver),
                  rdflib_termToLex(s.predicate, fakeResolver),
                  rdflib_termToLex(s.object, fakeResolver),
                  "."
                ].join(" ");
                if (triple in seen) {
                  seen[triple]++; // in case we ever care how many times we skipped it.
                } else {
                  console.log(triple);
                  seen[triple] = 0;
                }
                if ("referenced" in s) {
                  _dive1(s.referenced.solution);
                }
              });
            } else {
              throw Error("unexpected expr type "+solns.type+" in " + JSON.stringify(solns));
            }
          }

          if (Object.keys(res).length > 0)
            _dive1(res);
        } else if (!valParms.quiet)
          console.log(JSON.stringify(res, null, "  "));
        // Exit status 2 means validation failure.
        return Promise.resolve(passed ? 0 : 2);
      }
    }
  }).catch(function (e) {
    console.error("aborting:", typeof e === "object" && "stack" in e ? e.stack : e);
    return 1;
  });

}

function rdflib_termToLex (node, resolver) {
  if (typeof node === "object") {
    var ret = node.value
        .replace(/"/g, "\\\"")
        .replace(/\r/g, "\\r")
        .replace(/\n/g, "\\n")
        .replace(/\t/g, "\\t");
    ret = "\"" + ret + "\"";
    if ("language" in node)
      ret = ret + "@" + node.language;
    if ("datatype" in node)
      ret = ret + "^^" + rdflib_termToLex(node.datatype, resolver);
    return ret;
  }
  if (node === "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
    return "a";
  if (node === ShEx.Validator.start)
    return START_SHAPE_LABEL;
  if (node === resolver._base)
    return "<>";
  if (node.indexOf(resolver._base) === 0/* &&
                                           ['#', '?'].indexOf(node.substr(resolver._base.length)) !== -1 */)
    return "<" + node.substr(resolver._base.length) + ">";
  if (node.indexOf(resolver._basePath) === 0 &&
      ['#', '?', '/', '\\'].indexOf(node.substr(resolver._basePath.length)) === -1)
    return "<" + node.substr(resolver._basePath.length) + ">";
  return ShEx.N3.Writer({ prefixes:resolver.meta.prefixes || {} })._encodeObject(node);
}

// Extract user commands
var arg0 = process.argv[1];
function getInvocation (parms) {
  function shortenFile (u) {
    var m = u.match(/file:\/\/(\/.*)$/);
    if (m)
      u = Path.relative(process.cwd(), m[1]);
    return "'"+u+"'";
  }
  function shexFileArg (u) { return u ? "-x "+shortenFile(u) : ''; }
  function jsonFileArg (u) { return u ? "-j "+shortenFile(u) : ''; }
  function dataFileArg (u) { return u ? "-d "+shortenFile(u) : ''; }
  function jsonldFileArg (u) { return u ? "-l "+shortenFile(u) : ''; }
  var res = [Path.relative(process.cwd(), arg0),
             parms.shex.map(shexFileArg).join(' '),
             parms.json.map(jsonFileArg).join(' '),
             parms.data.map(dataFileArg).join(' '),
             parms.jsonld.map(jsonldFileArg).join(' '),
             parms.shape ? "-s " + parms.shape : '',
             parms.node ? "-n " + parms.node : '',
             parms.mapURL ? "-M " + parms.mapURL : '',
             parms.map ? "-m " + parms.map : ''
            ];
  if ("options" in parms)
    Object.keys(parms.options).forEach(k => { // e.g. --regex-module nfax-val-1err
      res.push("--" + k.replace(/([A-Z])/g, function (c) {
        return "-" + c.toLowerCase();
      }) + " " + parms.options[k]);
    });
  if ("result" in parms)
    res.push("--result " + // results are booleans or filenames
             (typeof parms.result === "string" ?
              shortenFile(parms.result) :
              parms.result));
  return res.join(' ');
}

var cmds = (function () {
  try {
    return CLI;
  } catch (e) {
    console.error(e.message);
    process.exit(1);
  }
})();

if (cmds.help)
  abort("");

if ("duplicate-shape" in cmds)
  SchemaOptions.duplicateShape = cmds["duplicate-shape"];

if (cmds.or) {
  if (!(cmds.or in ShExValidator.options.or))
    throw Error("unknown or option \"" + cmds.or + "\" - expected one of " + Object.keys(ShExValidator.options.or).join(", ") + "\".");
  ValidatorOptions.or = cmds.or;
}

if (cmds.partition) {
  if (!(cmds.partition in ShExValidator.options.partition))
    throw Error("unknown partition option \"" + cmds.partition + "\" - expected one of " + Object.keys(ShExValidator.options.partition).join(", ") + "\".");
  ValidatorOptions.partition = cmds.partition;
}

var TraitToOption = { eachOf: "or", oneOf: "or", someOf: "or", exhaustive: "partition", greedy: "partition" };
if (cmds["turtle-manifest"].length || cmds["jsonld-manifest"].length) {
  loadRDFmanifest(cmds).then(function (exitCode) {
    process.on('exit', function() { process.exit(exitCode); });
  });
} else if (cmds["json-manifest"].length) {
  Promise.all(loadJSONmanifest(cmds["json-manifest"])).then(function (results) {
    // exit with a 0 if all returned 0, else a failure code.
    process.on('exit', function() { process.exit(results.reduce(function (soFar, result) {
      return soFar ? soFar : result;
    }, 0)); });
  });
} else {
  loadCommandLine(cmds).then(function (ret) {
    process.on('exit', function() { process.exit(ret); });
  });
}

/* queueTests - take a structure like
[ { json: [],
    validatorOptions: {},
    shex: [ 'file:///tmp/t/t.shex' ],
    shape: 'http://www.w3.org/fhir-rdf-shapes/MedicationOrder',
    data: [ 'file:///tmp/t/t.ttl' ],
    node: 'file:///tmp/t/MedicationOrder/12345-67' } ]
and some constructor args like {or: "someOf", partition: "exhaustive"}
*/
function queueTests (tests, cmds) {
  return Promise.all(tests.map(function (test) {
    // run each test and return the result (0 or 2, so far)
    if (cmds.invocation) {
      console.log(getInvocation(test));
    }
    if ("regexModule" in test.validatorOptions)
      test.validatorOptions.regexModule = getRegexModule(cmds["regexModule"]);
    var schemaOptions = Object.assign({}, SchemaOptions, test.schemaOptions || {});
    return loadSchemaAndData(test, test.validatorOptions, schemaOptions);
  })).then(function (results) {
    // exit with a 0 if all returned 0, else a failure code.
    return results.reduce(function (soFar, result) {
      return soFar ? soFar : result;
    }, 0);
  });
}

function runValidator (graph, shapeMap, schema, options) {
  // prepare validator
  var validator = ShExValidator.construct(schema, options);
  var extensions = ShExLoader.loadExtensions();
  Object.keys(extensions).forEach(function (ext) {
    extensions[ext].register(validator);
  });

  if (cmds["dry-run"])
    return 0;

  // run validator
  var res = cmds.grep
      ? grep(graph, shapeMap)
      : validator.validate(graph, shapeMap);

  Object.keys(extensions).forEach(function (ext) {
    extensions[ext].done(validator);
  });

  // display results
  if (cmds.exec) {
    /* example usage:
       ./bin/validate -x test/Map/BPFHIR.shex -d test/Map/BPFHIR.ttl -n tag:BPfhir123 -e '
       return require("../lib/ShExLoader").load(["test/Map/BPunitsDAM.shex"], [], []).
       then(function (loaded) {
       var db = require("../extensions/shex-map").materializer(loaded.schema).
       materialize(validator.semActHandler.results["http://shex.io/extensions/Map/#"], "tag:b0").
       getTriples(null, null, null);
       var w = require("n3").Writer({ prefixes: { map: "http://shex.io/extensions/Map/#" } });
       w.addTriples(db); w.end(function (error, result) { console.log(result); });
       });'
    */
    return eval("function (validator) {\n" + cmds.exec + "}")(validator);
  } else {
    if (!cmds.quiet)
      if ("errors" in res && Object.keys(validator.semActHandler.results).length) {
        res.semActResults = validator.semActHandler.results;
      }
    return res;
  }

  function grep (graph, fixedMap) {
    var passes = fixedMap.reduce((acc, row) => {
      var res = validator.validate(graph, [row]);
      return "errors" in res
        ? acc
        : acc.concat(res);
    }, []);
    return passes.length > 1
      ? { type: "SolutionList", solutions: passes }
    : passes.length === 1
      ? passes[0]
      : {};
  }
}

function loadRDFmanifest (cmds) {

  // from a manifest supplied on the command line
  var mf = "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#"; // mf: namespace in the manifest file.
  var sht = "http://www.w3.org/ns/shacl/test-suite#";
  return ShExLoader.load([], [], cmds["turtle-manifest"] || [], cmds["jsonld-manifest"] || [], SchemaOptions, undefined).then(function (loaded) {

    var testNodes = cmds["test-name"].length ?
        cmds["test-name"].reduce((ret, testName) => {
          var pattern = RegExp(testName);
          loaded.data.getTriples(null, mf+"name", null).filter(function (triple) {
            if (N3.Util.getLiteralValue(triple.object).match(pattern))
              ret.push(triple.subject);
          });
          return ret;
        }, []) :
        loaded.data.getTriples(null, mf+"action", null).map(function (t) {
          return t.subject;
        });
    var tests = testNodes.map(function (subject) {
      var action = loaded.data.getTriples(subject, mf+"action", null)[0].object;
      var validatorOptions = Object.assign({}, ValidatorOptions);
      var schemaOptions = Object.assign({}, SchemaOptions);
      var traits = loaded.data.getTriples(subject, sht+"trait", null);
      traits.forEach(function (t) {
        var tStr = t.object.substr(sht.length);
        tStr = tStr.substr(0, 1).toLowerCase() + tStr.substr(1);
        if (tStr in TraitToOption)
          validatorOptions[TraitToOption[tStr]] = tStr;
      });

      return [["shex"  , "schema", true ],
              ["shape" , "shape" , false],
              ["data"  , "data"  , true ],
              ["node"  , "focus" , false],
              ["mapURL", "map"   , false]].reduce(function (ret, elt) {
                // Command like arguments override *each* manifest entry.
                if (cmds[elt[0]] &&
                    !(cmds[elt[0]].constructor === Array && cmds[elt[0]].length === 0)) {
                  ret[elt[0]] = cmds[elt[0]];
                } else {
                  var m = loaded.data.getTriples(action, sht+elt[1], null);
                  if (m.length > 0) {
                    var v = m[0].object;
                    ret[elt[0]] = elt[2] ? [v] : v;
                  }
                }
                return ret;
              }, {json: [], jsonld: [],
                  validatorOptions: validatorOptions,
                  schemaOptions: schemaOptions}); // @@ no way to specify jsonld in manifest
    });
    return queueTests(tests, cmds);
  }).catch(function (e) {
    console.error("failed to load manifest file \"" +
                  ((cmds["turtle-manifest"] || []).concat(cmds["jsonld-manifest"] || [])) +
                  "\":", e, e.stack);
    return 1;
  });
}

function loadJSONmanifest (jsonManifests) {
  return jsonManifests.map(function (jm) {
    return ShExLoader.GET(jm).then(function (p) {
      try {
        var d = JSON.parse(p.text);

        // normalize the input
        if (d.constructor !== Array) {
          if ("@graph" in d) // Extract from the JSON-LD manifest format.
            d = d["@graph"][0].entries.map(function (t) {
              var options = Object.assign({}, ValidatorOptions);
              t.trait.forEach(function (tStr) {
                tStr = tStr.substr(0, 1).toLowerCase() + tStr.substr(1);
                if (tStr in TraitToOption)
                  options[TraitToOption[tStr]] = tStr;
              });
              return Object.assign(
                { name: t.name,
                  shex: t.action.schema,
                  data: t.action.data,
                  options: options },
                "shape" in t.action ? {shape: t.action.shape} : {},
                "focus" in t.action ? {node: t.action.focus} : {},
                "map" in t.action ? {map: t.action.map} : {}
              );
            });
          else // Hopefully an object like { shex: "t.shex" , shape: "S", data: "t.ttl", "node": "s" }
            d = [d];
        }
        if (cmds["test-name"].length) // Include only tests listed in --test-name.
          d = d.filter(function (elt) {
            return cmds["test-name"].reduce(function (ret, n) {
              var pattern = RegExp(n);
              return ret || ("name" in elt && elt.name.match(pattern));
            }, false);
          });
        d.forEach(function (elt) {
          // FileURL parameters are lists of URLs relative to the manifest file.
          ["shex", "json", "data", "jsonld"].forEach(function (attr) {
            if (!(attr in elt))
              elt[attr] = [];
            else if (elt[attr].constructor !== Array)
              elt[attr] = [elt[attr]];
            elt[attr] = elt[attr].map(function (url) {
              return ShExUtil.resolveRelativeIRI(p.url, url); // note that these don't permit prefixes (for now).
            });
          });

          // Command like arguments override *each* manifest entry.
          ["node", "shape", "map"].forEach(function (attr) {
            if (attr in cmds)
              elt[attr] = cmds[attr]
          })
          elt.schemaOptions = Object.assign({}, SchemaOptions);
          if ("options" in elt) {
            elt.validatorOptions = elt.options;
            delete elt.options;
          } else
            elt.validatorOptions = ValidatorOptions;
          if ("result" in elt && elt.result !== null)
            elt.result = ShExUtil.resolveRelativeIRI(p.url, elt.result);
          elt.jsonld = []; // @@ no way to specify jsonld in manifest
        });
        return queueTests(d, cmds);
      } catch (e) {
        console.error("failed to process json manifest: " + (e.stack || e));
        return 1;
      }
    }).catch(function (e) {
      console.error("failed to load json manifest: ", e.stack || e);
      return 1;
    });
  });
}

function loadCommandLine (cmds) {
  // from the command line directly

  // coerce booleans (no files called "true" or "false", please).
  if ("result" in cmds) {
    if (cmds.result === "true") cmds.result = true;
    else if (cmds.result === "false") cmds.result = false;
  }
  if (cmds.invocation) {
    console.log(getInvocation(cmds));
  }
  if ("regex-module" in cmds) {
    if (cmds["regex-module"] === "?") {
      // "--regex-module ?" lists the known regular expression modules.
      return new Promise(function (resolve, reject) {
        require('fs').readdir(Path.join(__dirname, REGEX_MODULES), (err, fns) => {
          if (err) throw Error(err);
          fns.forEach(fn => {
            if (fn.endsWith(".js")) {
              try {
                var m = require(REGEX_MODULES + fn);
                console.log("  --regex-module " + m.name + " -- " + m.description);
              } catch (e) {  }
            }
          });
          resolve(0);
        });
      });
    } else {
      ValidatorOptions.regexModule = getRegexModule(cmds["regex-module"]);
    }
  }
  return loadSchemaAndData(cmds, ValidatorOptions, SchemaOptions);
}

function runServer (serverLoaded, serverParms, serverOptions) {
  var match = serverParms.serve.match(/^(?:http:\/\/)?([a-zA-Z0-9.]+)(?::([0-9]+))?(\/.*)/);
  if (!match)
    throw Error("server parameter expected to be a URL");

  var log     = console.log,
      app     = require("koa")(),
      koaBody = require("koa-body"),
      port    = match[2] || 80,
      host    = "http://" + match[1],
      srvpath = match[3],
      fs      = require("fs");

  app.
    use(koaBody({
      multipart: true,
      formLimit: 15,
      formidable: {
        uploadDir: Path.resolve(__dirname, "../rest/uploads")
      }
    })).
    use(function *(next) {
      switch (this.originalUrl) {
      case srvpath:
        var parms = {
          // untested! schemaName:undefined, schemaFile:undefined, shape:undefined,
          // untested! dataName:undefined,   dataFile:undefined,   node:undefined, nodeType:undefined
          schemaName:undefined, schemaFile:undefined, shape:undefined,
          dataName:undefined,   dataFile:undefined,   node:undefined, nodeType:undefined
        };
        if (this.request.method !== "POST")
          this.throw(500, "only supports POST now");
        var body = this.request.body;
        function makeArray (key) {
          if (!(key in body.files))
            return [];
          return (body.files[key].constructor === Array ?
                  body.files[key] :
                  [body.files[key]]
                 ).map(pair => {
                   return pair.path; // should ShExLoader accept [.name, .path]?
                 });
        }
        var schemaFiles = makeArray("schema");
        parms.shape      = body.fields.shape; // untested! body.fields.shape
        var dataFiles = makeArray("data");
        parms.node      = body.fields.node; // untested! body.fields.node
        parms["node-type"] = body.fields.nodeType;

        // Default to serverParms
        Object.keys(parms).forEach(k => {
          if (parms[k] === undefined && k in serverParms)
            parms[k] = serverParms[k];
        });
        var options = serverOptions;
        if ("regex-module" in parms)
          options = extend(options, {
            regexModule: require(REGEX_MODULES + parms["regex-module"])
          });

        var httpResponse = this;
        yield ShExLoader.
          load(schemaFiles, [], dataFiles, [], SchemaOptions, undefined).
          then(function (loaded) {
            // add server-loaded (meta)data
            serverLoaded.data.getTriplesByIRI().forEach(t => { loaded.data.addTriple(t); });
            loaded.data = ShExUtil.makeN3DB(loaded.data);
            loaded.dataMeta = loaded.dataMeta.concat(serverLoaded.dataMeta);

            // add server-loaded schema
            ShExUtil.merge(loaded.schema, serverLoaded.schema, false, true);
            loaded.schemaMeta = loaded.schemaMeta.concat(serverLoaded.schemaMeta);

            var result = findNodesAndValidate(loaded, parms, options);
            if ("results" in result)
              result = result.results; // otherwise just report the invoked parms.
            if (body.fields.output === "html") {
              httpResponse.body = Object.keys(parms).reduce((r, p) => {
                return r.replace("["+p+"]", parms[p]);
              }, fs.readFileSync("./validate.template", "utf-8")).
                replace(/\[result\]/, JSON.stringify(result, null, 2));
            } else {
              httpResponse.body = JSON.stringify(result, null, 2)+"\n";
            }
          }).then(function () {
            schemaFiles.forEach(path => { fs.unlink(path); });
            dataFiles.forEach(path => { fs.unlink(path); });
            return next;
          }).catch(e => {
            var stack = typeof e === "object" && "stack" in e ? e.stack : e;
            var msg = typeof e === "object" && "message" in e ? e.message : e;
            console.warn(stack);
            httpResponse.body = {
              type: "ParsingError",
              errors: [msg]
            };
            httpResponse.status = 500;
            schemaFiles.forEach(path => { fs.unlink(path); });
            dataFiles.forEach(path => { fs.unlink(path); });
          });
        break;
      case "/":
        this.body = fs.readFileSync(Path.resolve(__dirname, "../rest/index.html"), "utf-8");
        break;
      default:
        this.throw(404, "whazzat?");
      }
    }).
    listen(port);

  log('Visit %s:%s/ in browser.', host, port);
  log();
  log('Test with a supplied schema and data:');
  log('  curl -i %s:%s%s -F "schema=@%s1dotOr2dot.shex" -F "shape=http://a.example/S1" -F "data=@%sp2p3.ttl" -F "node=x"', host, port, srvpath, CliDir, CliDir);
  log('or preload the schema and just supply the data:');
  log('  %s -x %s1dotOr2dot.shex -s http://a.example/S1 -S ', Argv1, CliDir, serverParms.serve);
  log('and pass only the data parameters:');
  log('  curl -i %s:%s%s -F "data=@%sp2p3.ttl" -F "node=x"', host, port, srvpath, CliDir);
  log('Note that shape and node can be relative or prefixed URLs.');
  log();
  log('Press CTRL+C to stop...');
}

function getRegexModule (module) {
  if (module) {
    try {
      return require(module);
    } catch (e1) {
      try {
        return require(REGEX_MODULES + module);
      } catch (e2) {
        throw e1;
      }
    }
  }
}

function extend (base) {
  if (!base) base = {};
  for (var i = 1, l = arguments.length, arg; i < l && (arg = arguments[i] || {}); i++)
    for (var name in arg)
      base[name] = arg[name];
  return base;
}
back to top