swh:1:snp:7d9f1bc35e120776766db9334626062f837c20ad
Raw File
Tip revision: 4551266dc38b9231e60a64700d1004dcdb47ee45 authored by Duncan Temple Lang on 25 April 2007, 00:00:00 UTC
version 1.7-3
Tip revision: 4551266
EventParse.c

/*
 File that provides the entry point for an event driven XML parser
 that performs callbacks to the different user-level functions in
 the closure passed to it.

 * See Copyright for the license status of this software.

 */

#include "EventParse.h"
#include "Utils.h" /* For the findFunction and invokeFunction. */

#include "RSCommon.h"

extern void R_PreserveObject(SEXP);
extern void R_ReleaseObject(SEXP);

static void updateState(USER_OBJECT_ val, RS_XMLParserData *parserData);

/*
  Read the specified file as an XML document and invoke functions/methods in
  the handlers closure object when each node in the tree is encountered by 
  the parser. These events are startElement,endElement, character data, etc.
  The remaining arguments control how the calls to the user level functions
  are made. The first (addContext) indicates whether information about the position
  in the tree (an integer index path)

 */

typedef Rboolean Sboolean;

Sboolean
IsConnection(USER_OBJECT_ obj)
{
   int i;
   USER_OBJECT_ k = GET_CLASS(obj);
   if(GET_LENGTH(k) == 0)
     return(FALSE);

   for(i = 0; i < GET_LENGTH(k); i++) {
      if(strcmp("connection", CHAR_DEREF(STRING_ELT(k, i))) == 0)
	return(TRUE);
   }

   return(FALSE);
}


USER_OBJECT_ 
RS_XML(Parse)(USER_OBJECT_ fileName, USER_OBJECT_ handlers, USER_OBJECT_ addContext, 
               USER_OBJECT_ ignoreBlanks,  USER_OBJECT_ useTagName, USER_OBJECT_ asText,
                 USER_OBJECT_ trim, USER_OBJECT_ useExpat, USER_OBJECT_ stateObject,
                  USER_OBJECT_ replaceEntities, USER_OBJECT_ validate, USER_OBJECT_ saxVersion,
   	           USER_OBJECT_ branches, USER_OBJECT_ useDotNames)
{
#ifdef LIBEXPAT
  FILE *file = NULL;
  int expat = 0;
#endif
  char *name, *input;
  RS_XML_ContentSourceType asTextBuffer;
  RS_XMLParserData *parserData;
  USER_OBJECT_ ans;


  if(IsConnection(fileName) || isFunction(fileName))
     asTextBuffer = RS_XML_CONNECTION;
  else 
     asTextBuffer = LOGICAL_DATA(asText)[0] ? RS_XML_TEXT : RS_XML_FILENAME;

#ifdef LIBEXPAT
   expat = LOGICAL_DATA(useExpat)[0];
  if(expat && asTextBuffer == 0) {
#ifdef USE_R
    name = R_ExpandFileName(CHAR(STRING(fileName)[0]));
#else
    name = CHARACTER_DATA(fileName)[0];
#endif
    file = fopen(name,"r");
    if(file == NULL) {
      PROBLEM "Can't find file %s", name
      ERROR;
    }

  } else
#endif /* ifdef LIBEXPAT */


  if(asTextBuffer == RS_XML_CONNECTION) {
    name = strdup("<connection>");
    input = (char *)fileName;/*XXX*/
  } else {
    name = strdup(CHAR_DEREF(STRING_ELT(fileName, 0)));
    input = name;
  }

  parserData = RS_XML(createParserData)(handlers);
  parserData->branches         = branches;
  parserData->fileName         = name; 
  parserData->callByTagName    = LOGICAL_DATA(useTagName)[0]; 
  parserData->addContextInfo   = LOGICAL_DATA(addContext)[0]; 
  parserData->trim             = LOGICAL_DATA(trim)[0]; 
  parserData->ignoreBlankLines = LOGICAL_DATA(ignoreBlanks)[0]; 
  parserData->stateObject = (stateObject == NULL_USER_OBJECT ? NULL : stateObject);
  parserData->useDotNames = LOGICAL_DATA(useDotNames)[0];

  /*Is this necessary? Shouldn't it already be protected? Or is there a chance that we may 
    be doing this asynchronously in a pull approach. */
  if(parserData->stateObject && parserData->stateObject != NULL_USER_OBJECT)
    R_PreserveObject(parserData->stateObject);


#ifdef LIBEXPAT
  if(expat) {
      if(asTextBuffer == 0) {
     	RS_XML(parseWithParserData)(file, parserData);
      } else {
     	parserData->fileName = "<buffer>"; 
     	RS_XML(parseBufferWithParserData)(name, parserData);
     	free(name); /* match the strdup() above */
      }
  } else 
#endif /* ifdef LIBEXPAT */

#if 0
    /* If one wants entities expanded directly and to appear as text.  */
  if(LOGICAL_DATA(replaceEntities)[0]) 
      xmlSubstituteEntitiesDefault(1);   
#endif

  RS_XML(libXMLEventParse)(input, parserData, asTextBuffer, INTEGER_DATA(saxVersion)[0]);

  ans = parserData->stateObject ? parserData->stateObject : handlers;
  free(parserData->fileName);

  if(parserData->stateObject && parserData->stateObject != NULL_USER_OBJECT)
     R_ReleaseObject(parserData->stateObject);

  free(parserData);

  return(ans);
}



/**
Handler that receives declarations of unparsed entities. These are entity declarations that have a notation (NDATA) field: 

                  <!ENTITY logo SYSTEM "images/logo.gif" NDATA gif>
*/
void 
RS_XML(entityDeclarationHandler)(void *userData, const XML_Char *entityName, 
                                 const XML_Char *base, const XML_Char *systemId, 
                                   const XML_Char *publicId, const XML_Char *notationName)
{
 RS_XMLParserData *parserData = (RS_XMLParserData*)userData;
 USER_OBJECT_ opArgs;
 int i, num;
  const XML_Char *xml_args[5];

   num = sizeof(xml_args)/sizeof(xml_args[0]);

   xml_args[0] = entityName; xml_args[1] = base;
   xml_args[2] = systemId; xml_args[3] = publicId;
   xml_args[4] = notationName;

  opArgs = NEW_LIST(num);
  for(i =0;i < num; i++) {
   SET_VECTOR_ELT(opArgs, i,  NEW_CHARACTER(1));
   SET_STRING_ELT(VECTOR_ELT(opArgs, i), 0, COPY_TO_USER_STRING(xml_args[i] ? xml_args[i] : "")); 
  }

  RS_XML(callUserFunction)(HANDLER_FUN_NAME(parserData, "entityDeclaration"), 
                           (const char*)NULL, parserData, opArgs);
}


void 
RS_XML(startElement)(void *userData, const char *name, const char **atts)
{
  USER_OBJECT_ opArgs;
  int i;
  RS_XMLParserData *rinfo = (RS_XMLParserData*) userData;

  if((i = R_isBranch(name, rinfo)) != -1) {
      R_processBranch(rinfo, i, name, NULL, NULL, 0, NULL, 0, 0, atts);
      return;
  }

  PROTECT(opArgs = NEW_LIST(2));
  SET_VECTOR_ELT(opArgs, 0, NEW_CHARACTER(1));
  SET_STRING_ELT(VECTOR_ELT(opArgs, 0), 0, COPY_TO_USER_STRING(name)); 

  /* Now convert the attributes list. */
   SET_VECTOR_ELT(opArgs, 1, RS_XML(createAttributesList)(atts));
   RS_XML(callUserFunction)(HANDLER_FUN_NAME(rinfo, "startElement"), name, ((RS_XMLParserData*) userData), opArgs);
   UNPROTECT(1);
}

void 
RS_XML(commentHandler)(void *userData, const XML_Char *data)
{
  USER_OBJECT_ opArgs = NEW_LIST(1);
 RS_XMLParserData *rinfo = (RS_XMLParserData *) userData;

  PROTECT(opArgs);
  SET_VECTOR_ELT(opArgs, 0, NEW_CHARACTER(1));
     SET_STRING_ELT(VECTOR_ELT(opArgs, 0), 0, COPY_TO_USER_STRING(data));
     RS_XML(callUserFunction)(HANDLER_FUN_NAME(rinfo, "comment"), 
			      (const char *)NULL, ((RS_XMLParserData*)userData), opArgs);
  UNPROTECT(1);
}


USER_OBJECT_ 
RS_XML(createAttributesList)(const char **atts) 
{
  int n=0, i;
  const char **ptr = atts;
  USER_OBJECT_ attr_names;
  USER_OBJECT_ attr_values;
  while(ptr && ptr[0]) {
    n++;
    ptr += 2;
  }
 
  if(n < 1)
    return(NULL_USER_OBJECT);

  PROTECT(attr_values = NEW_CHARACTER(n));
  PROTECT(attr_names = NEW_CHARACTER(n));
     ptr = atts;
     for(i=0; i < n; i++, ptr+=2) {
      SET_STRING_ELT(attr_values, i, COPY_TO_USER_STRING(ptr[1]));
      SET_STRING_ELT(attr_names, i,  COPY_TO_USER_STRING(ptr[0]));
     }
    SET_NAMES(attr_values, attr_names);
  UNPROTECT(2);

  return(attr_values);
}

void RS_XML(endElement)(void *userData, const char *name)
{
 USER_OBJECT_ opArgs;
 RS_XMLParserData *rinfo = (RS_XMLParserData *) userData;

 if(rinfo->current) {
     R_endBranch(rinfo, name, NULL, NULL);
      return;
 }

 ((RS_XMLParserData*)userData)->depth++;

  PROTECT(opArgs = NEW_LIST(1));
  SET_VECTOR_ELT(opArgs, 0, NEW_CHARACTER(1));
     SET_STRING_ELT(VECTOR_ELT(opArgs, 0), 0, COPY_TO_USER_STRING(name));

     RS_XML(callUserFunction)(HANDLER_FUN_NAME(rinfo, "endElement"), NULL, ((RS_XMLParserData*) userData), opArgs);
  UNPROTECT(1);

}

/**
 Called for inline expressions of the form
  <?target data-text>
 such as 
  <?R plot(1:10)>
*/
void 
RS_XML(processingInstructionHandler)(void *userData, const XML_Char *target, const XML_Char *data) 
{
 USER_OBJECT_ opArgs;
 RS_XMLParserData *parserData = (RS_XMLParserData *) userData;

 PROTECT(opArgs = NEW_LIST(2));
 SET_VECTOR_ELT(opArgs, 0, NEW_CHARACTER(1));
   SET_STRING_ELT(VECTOR_ELT(opArgs, 0), 0, COPY_TO_USER_STRING(target));
 SET_VECTOR_ELT(opArgs, 1, NEW_CHARACTER(1));
   SET_STRING_ELT(VECTOR_ELT(opArgs, 1), 0, COPY_TO_USER_STRING(data));
   RS_XML(callUserFunction)(HANDLER_FUN_NAME(parserData, "processingInstruction"), 
                             (const char *)NULL, (RS_XMLParserData*)userData, opArgs);
 UNPROTECT(1);
}

void 
RS_XML(startCdataSectionHandler)(void *userData) 
{
}

void 
RS_XML(endCdataSectionHandler)(void *userData) 
{
}



void 
RS_XML(textHandler)(void *userData,  const XML_Char *s, int len)
{
 char *tmpString, *tmp;
 USER_OBJECT_ opArgs = NULL;
 RS_XMLParserData *parserData = (RS_XMLParserData*)userData; 

  if(parserData->current) {
      xmlChar *tmp = (xmlChar *) S_alloc((len + 1), sizeof(xmlChar));
      memcpy(tmp, s, len); tmp[len] = '\0';
      xmlAddChild(parserData->current, xmlNewText(tmp));
      return;
  }
  /* Last case handles ignoring the new line between the two nodes if trim is TRUE.
     <abc/>
     <next>
     */
  if(s == (XML_Char*)NULL || s[0] == (XML_Char)NULL || len == 0 || (len == 1 && s[0] == '\n' && parserData->trim))
    return;

           /* 1 more than length so we can put a \0 on the end. */
    tmp = tmpString = (char*)calloc(len+1, sizeof(char));
    strncpy(tmpString, s, len);
 
    if(parserData->trim) {
      tmpString = trim(tmpString);
      len = strlen(tmpString);
    }

  if(len > 0 || parserData->ignoreBlankLines == 0 ) {
    PROTECT(opArgs = NEW_LIST(1));
     SET_VECTOR_ELT(opArgs, 0, NEW_CHARACTER(1));
     SET_STRING_ELT(VECTOR_ELT(opArgs, 0), 0, COPY_TO_USER_STRING(tmpString));
  }

  free(tmp);

    /* If we are ignoring blanks and the potentiall newly computed length is non-zero, then
       call the user function.
     */

  if(opArgs != NULL) {
      RS_XML(callUserFunction)(HANDLER_FUN_NAME(parserData, "text"), (const char *)NULL, ((RS_XMLParserData*) userData), opArgs);
     UNPROTECT(1);
  }
}


int
RS_XML(notStandAloneHandler)(void *userData)
{
  /*  printf("In NotStandalone handler\n"); */
 return(1);
}


/**
  Create the parser data which contains the 
  the collection of functions to call for each 
  event type.

  This allocates the parser memory using calloc.
  The caller should arrange to free it.
*/
RS_XMLParserData *
RS_XML(createParserData)(USER_OBJECT_ handlers) 
{
 RS_XMLParserData *parser = calloc(1, sizeof(RS_XMLParserData));

 parser->methods = handlers;

return(parser);
}

/**
  Routine that locates and invokes the R function in the collection of handlers.

   opName is the  identifier for the generic operation, i.e. startElement, text, etc.
   perferredName is the identifier for the node.
   
*/
USER_OBJECT_
RS_XML(callUserFunction)(char *opName, const char *preferredName, RS_XMLParserData *parserData, USER_OBJECT_ opArgs) 
{
  USER_OBJECT_ fun = NULL, val;
  USER_OBJECT_ _userObject = parserData->methods;
  int general = 0;

  if(preferredName && parserData->callByTagName) {
    fun = RS_XML(findFunction)(preferredName, _userObject);
  }

  if(fun == NULL) {
    general = 1;
    fun = RS_XML(findFunction)(opName, _userObject);
  }

  if(fun == NULL || isFunction(fun) == 0 ) {  
/* || (general && R_isInstanceOf(fun, "AsIs"))) Should we do this? */
      /* FAILED */
   return(NULL_USER_OBJECT);
  }

  val = RS_XML(invokeFunction)(fun, opArgs, parserData->stateObject);
  updateState(val, parserData);
  return(val); 
}

void
updateState(USER_OBJECT_ val, RS_XMLParserData *parserData)
{
    if(!parserData->stateObject || parserData->stateObject == NULL_USER_OBJECT) {
       return;
    }

#ifdef _R_
    R_ReleaseObject(parserData->stateObject);
    R_PreserveObject(val);
#else
    decr_ref_count(parserData->stateObject, TRUE, Local_data, S_evaluator);
    incr_ref_count(val, TRUE, Local_data, S_evaluator);
#endif
    parserData->stateObject = val;
}
back to top