mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
473 lines
12 KiB
Plaintext
473 lines
12 KiB
Plaintext
|
// file : xml/parser -*- C++ -*-
|
||
|
// copyright : Copyright (c) 2013-2017 Code Synthesis Tools CC
|
||
|
// license : MIT; see accompanying LICENSE file
|
||
|
|
||
|
#ifndef XML_PARSER
|
||
|
#define XML_PARSER
|
||
|
|
||
|
#include <xml/details/pre.hxx>
|
||
|
|
||
|
#include <map>
|
||
|
#include <vector>
|
||
|
#include <string>
|
||
|
#include <iosfwd>
|
||
|
#include <cstddef> // std::size_t
|
||
|
|
||
|
#include <xml/details/config.hxx> // STUDXML_NOTHROW_NOEXCEPT,
|
||
|
// LIBSTUDXML_EXTERNAL_EXPAT
|
||
|
|
||
|
#ifndef LIBSTUDXML_EXTERNAL_EXPAT
|
||
|
# include <xml/details/expat/expat.h>
|
||
|
#else
|
||
|
# include <expat.h>
|
||
|
#endif
|
||
|
|
||
|
// We only support UTF-8 Expat.
|
||
|
//
|
||
|
#ifdef XML_UNICODE
|
||
|
# error UTF-16 expat (XML_UNICODE defined) is not supported
|
||
|
#endif
|
||
|
|
||
|
#include <xml/forward>
|
||
|
#include <xml/qname>
|
||
|
#include <xml/content>
|
||
|
#include <xml/exception>
|
||
|
|
||
|
#include <xml/details/export.hxx>
|
||
|
|
||
|
namespace xml
|
||
|
{
|
||
|
class parsing: public exception
|
||
|
{
|
||
|
public:
|
||
|
virtual
|
||
|
~parsing () STUDXML_NOTHROW_NOEXCEPT {}
|
||
|
|
||
|
parsing (const std::string& name,
|
||
|
unsigned long long line,
|
||
|
unsigned long long column,
|
||
|
const std::string& description);
|
||
|
|
||
|
parsing (const parser& p, const std::string& description);
|
||
|
|
||
|
const std::string&
|
||
|
name () const {return name_;}
|
||
|
|
||
|
unsigned long long
|
||
|
line () const {return line_;}
|
||
|
|
||
|
unsigned long long
|
||
|
column () const {return column_;}
|
||
|
|
||
|
const std::string&
|
||
|
description () const {return description_;}
|
||
|
|
||
|
virtual const char*
|
||
|
what () const STUDXML_NOTHROW_NOEXCEPT {return what_.c_str ();}
|
||
|
|
||
|
private:
|
||
|
LIBSTUDXML_EXPORT void
|
||
|
init ();
|
||
|
|
||
|
private:
|
||
|
std::string name_;
|
||
|
unsigned long long line_;
|
||
|
unsigned long long column_;
|
||
|
std::string description_;
|
||
|
std::string what_;
|
||
|
};
|
||
|
|
||
|
class LIBSTUDXML_EXPORT parser
|
||
|
{
|
||
|
public:
|
||
|
typedef xml::qname qname_type;
|
||
|
typedef xml::content content_type;
|
||
|
|
||
|
typedef unsigned short feature_type;
|
||
|
|
||
|
// If both receive_attributes_event and receive_attributes_map are
|
||
|
// specified, then receive_attributes_event is assumed.
|
||
|
//
|
||
|
static const feature_type receive_elements = 0x0001;
|
||
|
static const feature_type receive_characters = 0x0002;
|
||
|
static const feature_type receive_attributes_map = 0x0004;
|
||
|
static const feature_type receive_attributes_event = 0x0008;
|
||
|
static const feature_type receive_namespace_decls = 0x0010;
|
||
|
|
||
|
static const feature_type receive_default = receive_elements |
|
||
|
receive_characters |
|
||
|
receive_attributes_map;
|
||
|
|
||
|
// Parse std::istream. Input name is used in diagnostics to identify
|
||
|
// the document being parsed.
|
||
|
//
|
||
|
// If stream exceptions are enabled then std::ios_base::failure
|
||
|
// exception is used to report io errors (badbit and failbit).
|
||
|
// Otherwise, those are reported as the parsing exception.
|
||
|
//
|
||
|
parser (std::istream&,
|
||
|
const std::string& input_name,
|
||
|
feature_type = receive_default);
|
||
|
|
||
|
// Parse memory buffer that contains the whole document. Input name
|
||
|
// is used in diagnostics to identify the document being parsed.
|
||
|
//
|
||
|
parser (const void* data,
|
||
|
std::size_t size,
|
||
|
const std::string& input_name,
|
||
|
feature_type = receive_default);
|
||
|
|
||
|
const std::string&
|
||
|
input_name () const {return iname_;}
|
||
|
|
||
|
~parser ();
|
||
|
|
||
|
private:
|
||
|
parser (const parser&);
|
||
|
parser& operator= (const parser&);
|
||
|
|
||
|
// Parsing events.
|
||
|
//
|
||
|
public:
|
||
|
enum event_type
|
||
|
{
|
||
|
// If adding new events, also update the stream insertion operator.
|
||
|
//
|
||
|
start_element,
|
||
|
end_element,
|
||
|
start_attribute,
|
||
|
end_attribute,
|
||
|
characters,
|
||
|
start_namespace_decl,
|
||
|
end_namespace_decl,
|
||
|
eof
|
||
|
};
|
||
|
|
||
|
event_type
|
||
|
next ();
|
||
|
|
||
|
// Get the next event and make sure that it's what's expected. If it
|
||
|
// is not, then throw an appropriate parsing exception.
|
||
|
//
|
||
|
void
|
||
|
next_expect (event_type);
|
||
|
|
||
|
void
|
||
|
next_expect (event_type, const std::string& name);
|
||
|
|
||
|
void
|
||
|
next_expect (event_type, const qname_type& qname);
|
||
|
|
||
|
void
|
||
|
next_expect (event_type, const std::string& ns, const std::string& name);
|
||
|
|
||
|
event_type
|
||
|
peek ();
|
||
|
|
||
|
// Return the even that was last returned by the call to next() or
|
||
|
// peek().
|
||
|
//
|
||
|
event_type
|
||
|
event () {return event_;}
|
||
|
|
||
|
// Event data.
|
||
|
//
|
||
|
public:
|
||
|
const qname_type& qname () const {return *pqname_;}
|
||
|
|
||
|
const std::string& namespace_ () const {return pqname_->namespace_ ();}
|
||
|
const std::string& name () const {return pqname_->name ();}
|
||
|
const std::string& prefix () const {return pqname_->prefix ();}
|
||
|
|
||
|
std::string& value () {return *pvalue_;}
|
||
|
const std::string& value () const {return *pvalue_;}
|
||
|
template <typename T> T value () const;
|
||
|
|
||
|
unsigned long long line () const {return line_;}
|
||
|
unsigned long long column () const {return column_;}
|
||
|
|
||
|
// Attribute map lookup. If attribute is not found, then the version
|
||
|
// without the default value throws an appropriate parsing exception
|
||
|
// while the version with the default value returns that value.
|
||
|
//
|
||
|
// Note also that there is no attribute(ns,name) version since it
|
||
|
// would conflict with attribute(name,dv) (qualified attributes
|
||
|
// are not very common).
|
||
|
//
|
||
|
// Attribute map is valid throughout at the "element level" until
|
||
|
// end_element and not just during start_element. As a special case,
|
||
|
// the map is still valid after peek() that returned end_element until
|
||
|
// this end_element event is retrieved with next().
|
||
|
//
|
||
|
const std::string&
|
||
|
attribute (const std::string& name) const;
|
||
|
|
||
|
template <typename T>
|
||
|
T
|
||
|
attribute (const std::string& name) const;
|
||
|
|
||
|
std::string
|
||
|
attribute (const std::string& name,
|
||
|
const std::string& default_value) const;
|
||
|
|
||
|
template <typename T>
|
||
|
T
|
||
|
attribute (const std::string& name, const T& default_value) const;
|
||
|
|
||
|
const std::string&
|
||
|
attribute (const qname_type& qname) const;
|
||
|
|
||
|
template <typename T>
|
||
|
T
|
||
|
attribute (const qname_type& qname) const;
|
||
|
|
||
|
std::string
|
||
|
attribute (const qname_type& qname,
|
||
|
const std::string& default_value) const;
|
||
|
|
||
|
template <typename T>
|
||
|
T
|
||
|
attribute (const qname_type& qname, const T& default_value) const;
|
||
|
|
||
|
bool
|
||
|
attribute_present (const std::string& name) const;
|
||
|
|
||
|
bool
|
||
|
attribute_present (const qname_type& qname) const;
|
||
|
|
||
|
// Low-level attribute map access. Note that this API assumes
|
||
|
// all attributes are handled.
|
||
|
//
|
||
|
struct attribute_value_type
|
||
|
{
|
||
|
std::string value;
|
||
|
mutable bool handled;
|
||
|
};
|
||
|
|
||
|
typedef std::map<qname_type, attribute_value_type> attribute_map_type;
|
||
|
|
||
|
const attribute_map_type&
|
||
|
attribute_map () const;
|
||
|
|
||
|
// Optional content processing.
|
||
|
//
|
||
|
public:
|
||
|
// Note that you cannot get/set content while peeking.
|
||
|
//
|
||
|
void
|
||
|
content (content_type);
|
||
|
|
||
|
content_type
|
||
|
content () const;
|
||
|
|
||
|
// Versions that also set the content. Event type must be start_element.
|
||
|
//
|
||
|
void
|
||
|
next_expect (event_type, const std::string& name, content_type);
|
||
|
|
||
|
void
|
||
|
next_expect (event_type, const qname_type& qname, content_type);
|
||
|
|
||
|
void
|
||
|
next_expect (event_type,
|
||
|
const std::string& ns, const std::string& name,
|
||
|
content_type);
|
||
|
|
||
|
// Helpers for parsing elements with simple content. The first two
|
||
|
// functions assume that start_element has already been parsed. The
|
||
|
// rest parse the complete element, from start to end.
|
||
|
//
|
||
|
// Note also that as with attribute(), there is no (namespace,name)
|
||
|
// overload since it would conflicts with (namespace,default_value).
|
||
|
//
|
||
|
public:
|
||
|
std::string
|
||
|
element ();
|
||
|
|
||
|
template <typename T>
|
||
|
T
|
||
|
element ();
|
||
|
|
||
|
std::string
|
||
|
element (const std::string& name);
|
||
|
|
||
|
std::string
|
||
|
element (const qname_type& qname);
|
||
|
|
||
|
template <typename T>
|
||
|
T
|
||
|
element (const std::string& name);
|
||
|
|
||
|
template <typename T>
|
||
|
T
|
||
|
element (const qname_type& qname);
|
||
|
|
||
|
std::string
|
||
|
element (const std::string& name, const std::string& default_value);
|
||
|
|
||
|
std::string
|
||
|
element (const qname_type& qname, const std::string& default_value);
|
||
|
|
||
|
template <typename T>
|
||
|
T
|
||
|
element (const std::string& name, const T& default_value);
|
||
|
|
||
|
template <typename T>
|
||
|
T
|
||
|
element (const qname_type& qname, const T& default_value);
|
||
|
|
||
|
// C++11 range-based for support. Generally, the iterator interface
|
||
|
// doesn't make much sense for the parser so for now we have an
|
||
|
// implementation that is just enough to the range-based for.
|
||
|
//
|
||
|
public:
|
||
|
struct iterator
|
||
|
{
|
||
|
typedef event_type value_type;
|
||
|
|
||
|
iterator (parser* p = 0, event_type e = eof): p_ (p), e_ (e) {}
|
||
|
value_type operator* () const {return e_;}
|
||
|
iterator& operator++ () {e_ = p_->next (); return *this;}
|
||
|
|
||
|
// Comparison only makes sense when comparing to end (eof).
|
||
|
//
|
||
|
bool operator== (iterator y) const {return e_ == eof && y.e_ == eof;}
|
||
|
bool operator!= (iterator y) const {return !(*this == y);}
|
||
|
|
||
|
private:
|
||
|
parser* p_;
|
||
|
event_type e_;
|
||
|
};
|
||
|
|
||
|
iterator begin () {return iterator (this, next ());}
|
||
|
iterator end () {return iterator (this, eof);}
|
||
|
|
||
|
private:
|
||
|
static void XMLCALL
|
||
|
start_element_ (void*, const XML_Char*, const XML_Char**);
|
||
|
|
||
|
static void XMLCALL
|
||
|
end_element_ (void*, const XML_Char*);
|
||
|
|
||
|
static void XMLCALL
|
||
|
characters_ (void*, const XML_Char*, int);
|
||
|
|
||
|
static void XMLCALL
|
||
|
start_namespace_decl_ (void*, const XML_Char*, const XML_Char*);
|
||
|
|
||
|
static void XMLCALL
|
||
|
end_namespace_decl_ (void*, const XML_Char*);
|
||
|
|
||
|
private:
|
||
|
void
|
||
|
init ();
|
||
|
|
||
|
event_type
|
||
|
next_ (bool peek);
|
||
|
|
||
|
event_type
|
||
|
next_body ();
|
||
|
|
||
|
void
|
||
|
handle_error ();
|
||
|
|
||
|
private:
|
||
|
// If size_ is 0, then data is std::istream. Otherwise, it is a buffer.
|
||
|
//
|
||
|
union
|
||
|
{
|
||
|
std::istream* is;
|
||
|
const void* buf;
|
||
|
} data_;
|
||
|
|
||
|
std::size_t size_;
|
||
|
|
||
|
const std::string iname_;
|
||
|
feature_type feature_;
|
||
|
|
||
|
XML_Parser p_;
|
||
|
std::size_t depth_;
|
||
|
bool accumulate_; // Whether we are accumulating character content.
|
||
|
enum {state_next, state_peek} state_;
|
||
|
event_type event_;
|
||
|
event_type queue_;
|
||
|
|
||
|
qname_type qname_;
|
||
|
std::string value_;
|
||
|
|
||
|
// These are used to avoid copying when we are handling attributes
|
||
|
// and namespace decls.
|
||
|
//
|
||
|
const qname_type* pqname_;
|
||
|
std::string* pvalue_;
|
||
|
|
||
|
unsigned long long line_;
|
||
|
unsigned long long column_;
|
||
|
|
||
|
// Attributes as events.
|
||
|
//
|
||
|
struct attribute_type
|
||
|
{
|
||
|
qname_type qname;
|
||
|
std::string value;
|
||
|
};
|
||
|
|
||
|
typedef std::vector<attribute_type> attributes;
|
||
|
|
||
|
attributes attr_;
|
||
|
attributes::size_type attr_i_; // Index of the current attribute.
|
||
|
|
||
|
// Namespace declarations.
|
||
|
//
|
||
|
typedef std::vector<qname_type> namespace_decls;
|
||
|
|
||
|
namespace_decls start_ns_;
|
||
|
namespace_decls::size_type start_ns_i_; // Index of the current decl.
|
||
|
|
||
|
namespace_decls end_ns_;
|
||
|
namespace_decls::size_type end_ns_i_; // Index of the current decl.
|
||
|
|
||
|
// Element state consisting of the content model and attribute map.
|
||
|
//
|
||
|
struct element_entry
|
||
|
{
|
||
|
element_entry (std::size_t d, content_type c = content_type::mixed)
|
||
|
: depth (d), content (c), attr_unhandled_ (0) {}
|
||
|
|
||
|
std::size_t depth;
|
||
|
content_type content;
|
||
|
attribute_map_type attr_map_;
|
||
|
mutable attribute_map_type::size_type attr_unhandled_;
|
||
|
};
|
||
|
|
||
|
typedef std::vector<element_entry> element_state;
|
||
|
std::vector<element_entry> element_state_;
|
||
|
|
||
|
// Empty attribute map to return when an element has no attributes.
|
||
|
//
|
||
|
const attribute_map_type empty_attr_map_;
|
||
|
|
||
|
// Return the element entry corresponding to the current depth, if
|
||
|
// exists, and NULL otherwise.
|
||
|
//
|
||
|
const element_entry*
|
||
|
get_element () const;
|
||
|
|
||
|
const element_entry*
|
||
|
get_element_ () const;
|
||
|
|
||
|
void
|
||
|
pop_element ();
|
||
|
};
|
||
|
|
||
|
LIBSTUDXML_EXPORT std::ostream&
|
||
|
operator<< (std::ostream&, parser::event_type);
|
||
|
}
|
||
|
|
||
|
#include <xml/parser.ixx>
|
||
|
#include <xml/parser.txx>
|
||
|
|
||
|
#include <xml/details/post.hxx>
|
||
|
|
||
|
#endif // XML_PARSER
|