C++ and XML...

Rob Wehrli plug-devel@lists.PLUG.phoenix.az.us
Tue Jul 17 13:47:00 2001


Kit Plummer wrote:
> 

Here is a very quick example in C that can easily be extended to produce
results useful in prototyping your application.  It uses expat, which is
probably the easiest to use C-based, non-validating parser available. 
Expat is extremely fast and great for quickly parsing even huge XML
files.  (I've easily parsed 180MB files in a few seconds.)

You can find expat at: http://www.jclark.com/xml/expat.html

It compiles easily for Win32 and Linux.  Note that you'll have to
produce your own Linux library as the default installation doesn't do it
for you.  I think that I simply copied all of its .o files to a "lib"
directory and used ar to create libexpat.a (as is used in the Makefile
example below).

You'll note that I didn't get far enough into it to put a lot of
comments into it.  You can see the commented out handler function calls
that are not implemented, but easily could be given more time and
perhaps a purpose... :)  Note how the "Response" elements' data are not
being reported as no handler is assigned.  I could easily add the
handler function to output it, but something more than just writing it
to screen should be done...such as a function call to a TTS engine.

You'll obviously note that this code simply parses the XML data file and
does none of the application-specific work, but what else would you
expect for a 20 minute hack?!  :)  Seriously, there would be a useful
bit of code written that would use the functions implemented here to be
a real application, but this should give you an idea of at least one way
to successfully use Linux to rapidly parse through XML and handle it as
desired.  There are at least 50 different ways to handle element
recognition and branching that I didn't implement here for brevity and
simplicity, but I'd definitely recommend them for a "full-featured"
application...however, at the same time, with just a few elements that
are not likely to change names, hardcoding them isn't all bad! ;)

If nothing else, at least you folks will get to see some of my "coding
style" and have an opportunity to disagree with my use of braces, white
space and other "religious" features!  BTW...if anyone has any questions
regarding the code, don't hesitate to ask.

Take Care.

Rob!


----------------checklist.xml----------------

<?xml version="1.0"?>
<CoPilot>
        <CheckList Name="PREFLIGHT" Identity="00000001">
        <!-- Preflight checklist prompts -->
                <Challenge Value="Aircraft Documents?">
                        <Response>Check</Response>
                </Challenge>
                <Challenge Value="Battery On?">
                        <Response>Check</Response>
                </Challenge>
                <Challenge Value="Flaps Down?">
                        <Response>Check</Response>
                </Challenge>
        </CheckList>
        <CheckList Name="TAXI" Identity="00000002">
        <!-- Taxi checklist prompts -->
                <Challenge Value="Aircraft Documents?">
                        <Response>Check</Response>
                </Challenge>
                <Challenge Value="Battery On?">
                        <Response>Check</Response>
                </Challenge>
                <Challenge Value="Flaps Down?">
                        <Response>Check</Response>
                </Challenge>
        </CheckList>
        <Response Value="Check" Invalid="0x10200FEA"/>
</CoPilot>

(Note that the last Response element can be stripped, or others changed
similarly or the "0x10200FEA" can be replaced with other branching or
prompt information.)

----------------CoPilot.c----------------

/******************************************************
 *
 *  FILENAME:   CoPilot.c
 *  AUTHOR:     Rob Wehrli
 *  DATE:       18 JULY 2001
 *  COPYRIGHT:  Copyright 2001 by Rob Wehrli
 *  LICENSE:    GPL v2.0 or later
 *  PURPOSE:    Implementation of CoPilot Application
 *
 ******************************************************/

#ifndef _CoPilot_h_
#include "CoPilot.h"
#endif /* _CoPilot_h_ */

/* Globals */
XML_Parser g_parser   = NULL;
FILE *g_InFile        = NULL;
FILE *g_OutFile       = NULL;

/******************************************************
 *
 *  StartElement()
 *
 ******************************************************/
void StartElement( void *userData, const char *name, const char **atts )
{
  int iAttributes = XML_GetSpecifiedAttributeCount( g_parser );
  if( strcmp( name, "CoPilot" ) == 0 )
  {
    printf( "CoPilot\n" );
    return;
  }
  
  if( strcmp( name, "CheckList" ) == 0 )
  {
    printf( "CheckList\n" );
    if( iAttributes )
      ShowAttributes( iAttributes, atts );
    return;
  }

  if( strcmp( name, "Challenge" ) == 0 )
  {
    printf( "Challenge\n" );
    if( iAttributes )
      ShowAttributes( iAttributes, atts );
    /* XML_SetCharacterDataHandler( g_parser, ChallengeDataHandler ); */
    return;
  }

  if( strcmp( name, "Response" ) == 0 )
  {
    printf( "Response\n" );
    if( iAttributes )
      ShowAttributes( iAttributes, atts );
    /* XML_SetCharacterDataHandler( g_parser, ResponseDataHandler ); */
    return;
  }

  return; 
}

/******************************************************
 *
 *  EndElement()
 *
 ******************************************************/
void EndElement( void *userData, const char *name )
{

}

/******************************************************
 *
 *  ElementData()
 *
 ******************************************************/
void ElementData( void *userData, const XML_Char *s, int len )
{
  char* str = (char*)malloc( len + sizeof( char ) );
  memset( str, '\0', len + sizeof( char ) );
  strncpy( str, s, len );
  /* printf( "%s\n", str ); */
  /* fprintf( g_OutFile, str ); */
  free( str );
}

/******************************************************
 *
 *  DataHandler()
 *
 ******************************************************/

void DataHandler( void *userData, const XML_Char *s, int len )
{
  char* str = (char*)malloc( len + 1 );
  memset( str, '\0', len + 1 );
  strncpy( str, s, len );
  fprintf( g_OutFile, "\t        Data:  " );
  fprintf( g_OutFile, str );
  fprintf( g_OutFile, CRLF );
  free( str );

  XML_SetCharacterDataHandler( g_parser, ElementData );
}

/******************************************************
 *
 *  ShowAttributes()
 *
 ******************************************************/
void ShowAttributes( int iAttributes, const char **atts )
{
  int i = 0;
  for( i; i < iAttributes; i++ )
  {
    printf( "Attribute: %s\n", atts[i] );
  }
}

/******************************************************
 *
 *  Usage()
 *
 ******************************************************/
int Usage()
{
  printf( "Usage: CoPilot infile.xml outfile.log\n" );
  return -1;
}

/******************************************************
 *
 *  main()
 *
 ******************************************************/
int main( int argc, char *argv[] )
{
  char buf[BUFSIZ];
  int done;
  int depth = 0;

  if( argc < 3 )
    return Usage();

  g_InFile = fopen( argv[1], "r" );
  g_OutFile = fopen( argv[2], "w" );

  if( !g_InFile || !g_OutFile )
    return -1;

  g_parser = XML_ParserCreate( NULL );

  XML_SetUserData( g_parser, &depth );
  XML_SetElementHandler( g_parser, StartElement, EndElement );
  XML_SetCharacterDataHandler( g_parser, ElementData );
  do
  {
    size_t len = fread( buf, 1, sizeof( buf ), g_InFile );
    done = len < sizeof( buf );
    if( !XML_Parse( g_parser, buf, len, done ) )
    {
      fprintf( g_OutFile, "%s at line %d\n",
        XML_ErrorString( XML_GetErrorCode( g_parser ) ),
        XML_GetCurrentLineNumber( g_parser ) );
      return 1;
    }
  } while( !done );
  XML_ParserFree( g_parser );
  if( g_InFile )
    fclose( g_InFile );
  if( g_OutFile )
    fclose( g_OutFile );
  return 0;
}

----------------CoPilot.h----------------

/******************************************************
 *
 *  FILENAME:   CoPilot.h
 *  AUTHOR:     Rob Wehrli
 *  DATE:       18 JULY 2001
 *  COPYRIGHT:  Copyright 2001 by Rob Wehrli
 *  LICENSE:    GPL v2.0 or later
 *  PURPOSE:    Definitions for CoPilot Application 
 *
 ******************************************************/

#ifndef _CoPilot_h_
#define _CoPilot_h_

#include <stdio.h>
#include <stdlib.h>
#include <string.h>   

/*
    Expat Freeware C XML Parser
    See include file for open licensing agreement
*/
#include <xmlparse.h>

#define TAG_SLASH "/"
#define TAG_GT    ">"
#define TAG_LT    "<"
 
#define CRLF    "\r\n"

/*
  Function Declarations
*/
void StartElement( void *userData, const char *name, const char **atts
);
void ElementData( void *userData, const XML_Char *s, int len );
void EndElement( void *userData, const char *name );

void ShowAttributes( int iAttributes, const char **atts );
 
int Usage();
int main( int argc, char *argv[] ); 


#endif /* _CoPilot_h_ */


----------------output----------------

CoPilot
CheckList
Attribute: Name
Attribute: PREFLIGHT
Attribute: Identity
Attribute: 00000001
Challenge
Attribute: Value
Attribute: Aircraft Documents?
Response
Challenge
Attribute: Value
Attribute: Battery On?
Response
Challenge
Attribute: Value
Attribute: Flaps Down?
Response
CheckList
Attribute: Name
Attribute: TAXI
Attribute: Identity
Attribute: 00000002
Challenge
Attribute: Value
Attribute: Aircraft Documents?
Response
Challenge
Attribute: Value
Attribute: Battery On?
Response
Challenge
Attribute: Value
Attribute: Flaps Down?
Response
Response
Attribute: Value
Attribute: Check
Attribute: Invalid
Attribute: 0x10200FEA

----------------Makefile----------------

###############################################
# FILENAME:   Makefile
# AUTHOR:     Rob Wehrli
# COPYRIGHT:  Copyright 2001 by Rob Wehrli
# LICENSE:    GPL v2.0 or later
# PURPOSE:    Makefile for CoPilot
###############################################

CC        = gcc
CFLAGS    = -g

# change to your expat install location
EXPAT     = ../../expat/

INCLUDE   = -I$(EXPAT)include/
LIBPATH   = -L$(EXPAT)lib/
LIBRARY   = -lexpat

EXE       = CoPilot

all: clean $(EXE)

$(EXE): $(EXE).c $(EXE).h
	$(CC) $(CFLAGS) $(INCLUDE) -o $(EXE) $(EXE).c $(LIBPATH) $(LIBRARY) 

clean:
	rm -f $(EXE) $(EXE).o