XML Stream Parser. More...
#include <rxmlparser.h>
Data Structures | |
class | HTMLTag |
HTML Tag. | |
Public Types | |
enum | SectionType { Header, StyleSheet, DOCTYPE, Body } |
enum | HeaderAttributeType { Encoding, Version, Unknown } |
Public Types inherited from RTextFile | |
enum | RemType { NoComment, SingleLineComment, MultiLineComment, SingleMultiLineComment } |
enum | ParseSpaceType { LeaveSpaces, SkipAllSpaces } |
Public Member Functions | |
RXMLParser (void) | |
RXMLParser (const RURI &uri, const RCString &encoding="UTF-8") | |
RXMLParser (RIOFile &file, const RCString &encoding="UTF-8") | |
void | StopAnalysis (void) |
void | SetAvoidSpaces (bool as) |
size_t | GetCurrentDepth (void) const |
size_t | GetLastTokenPos (void) const |
void | SetHTMLMode (bool html) |
virtual void | Open (RIO::ModeType mode=RIO::Read) |
void | Open (const RURI &uri, RIO::ModeType mode=RIO::Read, const RCString &encoding="UTF-8") |
SectionType | GetSection (void) const |
HeaderAttributeType | GetHeaderAttribute (void) const |
bool | AcceptHTMLCodes (void) |
void | SetAcceptHTMLCodes (bool accepted) |
virtual | ~RXMLParser (void) |
Stream Input Methods | |
These methods are automatically called when a stream is read from a file. | |
void | InitParser (void) |
void | SetSection (SectionType section) |
void | ResetDepth (void) |
bool | MustAvoidSpaces (void) const |
bool | HasFoundClosingHTML (void) const |
RString | GetDocType (void) const |
virtual void | SetDocType (const RString &docType) |
virtual void | AddEntity (const RString &name, const RString &value) |
virtual void | HeaderAttribute (const RString &namespaceURI, const RString &lName, const RString &name) |
virtual void | HeaderValue (const RString &value) |
virtual void | BeginTag (const RString &namespaceURI, const RString &lName, const RString &name) |
virtual void | BeginTagParsed (const RString &namespaceURI, const RString &lName, const RString &name) |
virtual void | ResolveNamespace (const RString &namespaceURI) |
virtual void | AddAttribute (const RString &namespaceURI, const RString &lName, const RString &name) |
virtual void | Value (const RString &value) |
virtual void | EndTag (const RString &namespaceURI, const RString &lName, const RString &name) |
virtual void | Text (const RString &text) |
virtual void | SetDTD (const RString &dtd) |
virtual bool | OnlyQuote (void) |
Public Member Functions inherited from RTextFile | |
RTextFile (void) | |
RTextFile (const RURI &uri, const RCString &encoding="Latin1") | |
RTextFile (RIOFile &file, const RCString &encoding="Latin1") | |
void | Open (const RURI &uri, RIO::ModeType mode=RIO::Read, const RCString &encoding="Latin1") |
virtual void | Close (void) |
virtual void | Seek (off_t pos) |
virtual void | SeekRel (off_t pos) |
RChar | GetNextChar (void) const |
RChar | GetChar (void) |
const RChar | GetCur (void) const |
RString | GetChars (size_t size) |
RString | GetUntilEnd (void) |
void | SkipEol (void) |
bool | CurString (const RString &str, bool CaseSensitive=true, bool skip=true) |
void | SkipComments (void) |
void | SkipSpaces (void) |
size_t | SkipCountSpaces (RChar car) |
virtual void | SetEncoding (const RCString &name) |
RCString | GetEncoding (void) const |
void | SetRemStyle (RemType style) |
void | SetParseSpace (ParseSpaceType parse) |
ParseSpaceType | GetParseSpace (void) const |
void | SetRem (const RString &c) |
void | SetRem (const RString &b, const RString &e) |
bool | MustAddSeparator (void) const |
void | SetAddSeparator (bool add) |
RString | GetWord (void) |
RString | GetToken (const RString &endingchar) |
RString | GetTokenString (const RString &endingstr) |
RString | GetLine (bool skipempty=true) |
long | GetInt (void) |
unsigned long | GetUInt (void) |
RTextFile & | operator>> (RString &str) |
RTextFile & | operator>> (char &nb) |
RTextFile & | operator>> (unsigned char &nb) |
RTextFile & | operator>> (short &nb) |
RTextFile & | operator>> (unsigned short &nb) |
RTextFile & | operator>> (int &nb) |
RTextFile & | operator>> (unsigned int &nb) |
RTextFile & | operator>> (long &nb) |
RTextFile & | operator>> (unsigned long &nb) |
RTextFile & | operator>> (float &nb) |
RTextFile & | operator>> (double &nb) |
RTextFile & | operator>> (long double &nb) |
void | WriteLine (void) |
void | WriteStr (const RString &str, bool invalid=false) |
void | WriteStr (const char *c, bool invalid=false) |
void | WriteStr (const char *c, size_t l, bool invalid=false) |
RTextFile & | operator<< (const char *c) |
RTextFile & | operator<< (const RString &str) |
void | WriteLong (const long nb) |
RTextFile & | operator<< (const char nb) |
RTextFile & | operator<< (const short nb) |
RTextFile & | operator<< (const int nb) |
RTextFile & | operator<< (const long nb) |
void | WriteULong (const unsigned long nb) |
RTextFile & | operator<< (const unsigned char nb) |
RTextFile & | operator<< (const unsigned int nb) |
RTextFile & | operator<< (const unsigned long nb) |
void | WriteBool (const bool b) |
RTextFile & | operator<< (const bool b) |
void | WriteChar (const char c) |
void | WriteFloat (const float nb) |
RTextFile & | operator<< (const float nb) |
void | WriteDouble (const double nb) |
RTextFile & | operator<< (const double nb) |
void | WriteLongDouble (const long double nb) |
RTextFile & | operator<< (const long double nb) |
void | WriteTime (void) |
void | WriteLog (const RString &entry, bool invalid) |
unsigned long | GetLineNb (void) const |
unsigned long | GetLastLine (void) const |
void | SetSeparator (const RString &str) |
void | SetSeparator (const char *str) |
virtual | ~RTextFile (void) |
Public Member Functions inherited from RIOFile | |
RIOFile (void) | |
RIOFile (const RURI &uri) | |
RIOFile (RIOFile &file) | |
RURI | GetRealName (void) const |
void | Open (const RURI &uri, RIO::ModeType mode) |
bool | IsOpen (void) const |
size_t | Read (char *buffer, size_t nb, bool move=true) |
void | Write (const char *buffer, size_t nb) |
virtual void | SeekToEnd (void) |
void | Truncate (off_t newsize) |
bool | End (void) const |
off_t | GetSize (void) const |
off_t | GetPos (void) const |
virtual | ~RIOFile (void) |
Public Member Functions inherited from RFile | |
RFile (void) | |
RFile (const RURI &uri) | |
RFile (const RFile &file) | |
void | Open (const RURI &uri, RIO::ModeType mode) |
int | Compare (const RFile &file) const |
int | Compare (const RFile *file) const |
int | Compare (const RString &uri) const |
const RURI & | GetURI (void) const |
void | SetURI (const RURI &uri) |
const RString | GetFileName (void) const |
virtual | ~RFile (void) |
Static Public Member Functions | |
static RChar | CodeToChar (const RString &code, bool html) |
static RString | CharToCode (RChar car, bool strict=true) |
static RString | XMLToString (const RString &str, bool html) |
static RString | StringToXML (const RString &str, bool strict=true) |
Static Public Member Functions inherited from RTextFile | |
static bool | Eol (RChar car) |
Static Public Member Functions inherited from RFile | |
static RChar | GetDirSeparator (void) |
static void | RemoveFile (const RURI &uri) |
static void | RenameFile (const RURI &olduri, const RURI &newuri) |
static RURI | GetTempFile (void) |
static bool | Exists (const RURI &uri) |
static bool | IsDir (const RURI &uri) |
Protected Member Functions | |
RString | XMLToString (const RString &str) |
const HTMLTag * | GetCurHTMLTag (void) const |
const HTMLTag * | GetHTMLTag (const RString &name) const |
bool | IsCurTagClosing (void) const |
Protected Member Functions inherited from RTextFile | |
void | Next (void) |
RString | GetRealNb (void) |
void | WriteSeparator (void) |
Private Member Functions | |
void | InitValidTags (void) |
void | LoadHeader (void) |
void | LoadNextTag (void) |
void | LoadAttributes (bool &popdefault, RContainer< Namespace, false, false > &popuri, RChar EndTag1='/', RChar EndTag2='>') |
void | LoadHeaderAttribute (const RString &namespaceURI, const RString &lName, const RString &name) |
void | LoadHeaderValue (const RString &value) |
Private Attributes | |
RString | DocType |
bool | CurTagClosing |
RContainer< Namespace, true, true > | Namespaces |
RStack< RString, true, true, true > | DefaultNamespace |
RContainer< Attribute, true, false > | Attributes |
bool | AvoidSpaces |
size_t | CurDepth |
size_t | LastTokenPos |
SectionType | Section |
HeaderAttributeType | CurHeaderAttribute |
bool | HTMLCodes |
bool | HTMLMode |
bool | FoundClosingHTML |
HTMLTag * | CurHTMLTag |
bool | Break |
Static Private Attributes | |
static R::RContainer< HTMLTag, true, true > | Tags |
Additional Inherited Members | |
Protected Attributes inherited from RIOFile | |
bool | CanWrite |
bool | CanRead |
Protected Attributes inherited from RFile | |
RIO::ModeType | Mode |
RURI | URI |
Detailed Description
XML Stream Parser.
This class represents a parser for a XML stream read from a file. By default, it does nothing. It should be inherited by child classes that actually treat the information parsed (tags, attributes and content).
The RXMLFile is an example of a child class that fill a RXMLStruct from a XML file.
Member Enumeration Documentation
enum SectionType |
enum HeaderAttributeType |
Constructor & Destructor Documentation
RXMLParser | ( | void | ) |
Default constructor.
RXMLParser | ( | const RURI & | uri, |
const RCString & | encoding = "UTF-8" |
||
) |
Construct a XML file. If the pointer to the XML structure is null, a default structure is created and destroy when the file is closed.
- Parameters
-
uri URI of the file. encoding The encoding scheme of the file.
RXMLParser | ( | RIOFile & | file, |
const RCString & | encoding = "UTF-8" |
||
) |
Construct a XML file.
- Parameters
-
file A generic input/output file that should be treated as XML file. encoding The encoding scheme of the file.
|
virtual |
Destruct the XML file.
Member Function Documentation
|
private |
This method creates all the tags valid for the HTML version supported.
void StopAnalysis | ( | void | ) |
Specify that the analysis should be stopped.
void SetAvoidSpaces | ( | bool | as | ) |
Avoid spaces when a XML file is created.
size_t GetCurrentDepth | ( | void | ) | const |
Get the current depth of the XML tree parsed.
size_t GetLastTokenPos | ( | void | ) | const |
Get the position of the last token extracted
void SetHTMLMode | ( | bool | html | ) |
Set the HTML mode.
- Parameters
-
html HTML mode active ?
|
virtual |
void Open | ( | const RURI & | uri, |
RIO::ModeType | mode = RIO::Read , |
||
const RCString & | encoding = "UTF-8" |
||
) |
Open the file
- Parameters
-
uri URI of the file. mode The open mode for the file. encoding The encoding scheme of the file.
SectionType GetSection | ( | void | ) | const |
Get the current section treated.
HeaderAttributeType GetHeaderAttribute | ( | void | ) | const |
Get the current header attribute treated.
This function transform a given string that is supposed to represent a character. For example, the code # is a quote.
- Parameters
-
code Code. html HTML codes accepted ?
- Returns
- A RChar corresponding to the code or 0 if the code is not a valid one.
This function transform a given character (ex: <) into a string that represents a HTML code (ex: "lt").
- Parameters
-
car Character. strict If strict is true, the quotes are also transform, else only the characters '<' and '>' are transformed.
- Returns
- A RString corresponding to the character or the character itself if a code is not identified.
This function transform a string containing some XML or HTML code into a string with normal characters.
- Parameters
-
str XML string. html HTML codes accepted ?
- Returns
- A RString containing a normal string.
- Exceptions
-
A RException exception is generated if the string contains an invalid XML or HTML code.
This function transform a string containing some XML or HTML code into a string with normal characters.
- Parameters
-
str XML string.
- Returns
- A RString containing a normal string.
- Exceptions
-
A RIOException exception is generated if the string contains an invalid XML or HTML code.
This function transform a normal string into a valid XML string where some characters are replaced by codes.
- Parameters
-
str Normal string. strict If strict is true, the quotes are also transform, else on < and > are transformed.
- Returns
- A RString containing a valid XML string.
|
protected |
- Returns
- the current HTML Tag (if any).
|
protected |
- Returns
- a pointer to a given tag.
- Parameters
-
name Name of the tag.
|
protected |
- Returns
- true if the current tag is a closing one.
|
private |
Load the Header of a XML file (or nothing if it seems to be a HTML file).
|
private |
Load the next XML tag from a XML file.
|
private |
Load the attributes of the current tag and put them in a container. By default, the tag is supposed to be a normal XML tag ending with either '/>' or '>'.
- Parameters
-
popdefault A default namespace is defined for this tag popuri Namespaces with prefixes defined for this tag. EndTag1 Character than can delimited the tag. EndTag2 Another character than can delimited the tag.
|
private |
Method called each time an attribute will be treated when reading the XML header. Actually, it just catches the "encoding" attribute.
- Parameters
-
namespaceURI Namespace (if any). lName Local name of the attribute. name Complete name of the attribute.
|
private |
Method called each time some attribute value elements (words or spaces) are parsed when reading the XML header. Actually, it only set the encoding of the file using the value associated to the "encoding" attribute.
- Parameters
-
value Value processed.
void InitParser | ( | void | ) |
Initialize the parser.
void SetSection | ( | SectionType | section | ) |
Set the section type. It is necessary to specify where the XML stream is.
- Parameters
-
section Type of the section.
void ResetDepth | ( | void | ) |
Reset the depth. Each time, the XML stream is on the top of the XML structure, this method must be called
bool MustAvoidSpaces | ( | void | ) | const |
- Returns
- true of the spaces must be avoided.
bool HasFoundClosingHTML | ( | void | ) | const |
- Returns
- true if the closing "</html>" tag was found.
RString GetDocType | ( | void | ) | const |
- Returns
- the document type of the XML document.
|
virtual |
Set the document type of the XML document.
- Parameters
-
docType Name of the type.
Reimplemented in RHTMLFile.
Add a entity found.
- Parameters
-
name Name of the entity. value Corresponding value.
Reimplemented in RXMLFile.
|
virtual |
Method called each time an attribute will be treated when reading the XML header.
- Parameters
-
namespaceURI Namespace (if any). lName Local name of the attribute. name Complete name of the attribute.
|
virtual |
Method called each time some attribute value elements (words or spaces) are parsed when reading the XML header.
- Parameters
-
value Value processed.
Reimplemented in RXMLFile.
|
virtual |
Method called each time a beginning tag is parsed (after the parsing of the attributes).
- Parameters
-
namespaceURI Namespace (if any). lName Local name of the tag. name Complete name of the tag.
|
virtual |
Method called each time a tag defines a unknown namespace which is resolved by one of its attributes.
- Parameters
-
namespaceURI Namespace to assign to the current tag.
Reimplemented in RXMLFile.
|
virtual |
Method called each time an attribute will be treated when reading a XML file.
- Parameters
-
namespaceURI Namespace (if any). lName Local name of the attribute. name Complete name of the attribute.
Reimplemented in RXMLFile.
|
virtual |
Method called each time some attribute value elements (words or spaces) are parsed when reading a XML file.
- Parameters
-
value Value processed.
Reimplemented in RXMLFile.
|
virtual |
|
virtual |
|
virtual |
Method that specify if only quotes are allowed to delimit a parameter in a tag. By default, this function return true which is the syntax of XML.
Reimplemented in RHTMLFile.
bool AcceptHTMLCodes | ( | void | ) |
Method that specify if invalid XML codes (sequences beginning with a '&' and finish with a ';') are accepted. By default, this function return false which is the syntax of XML. This can be changed with the method 'SetInvalidXMLCodes'.
void SetAcceptHTMLCodes | ( | bool | accepted | ) |
Specify if invalid XML codes should be accepted.
- Parameters
-
accepted Yes/No.
Field Documentation
|
staticprivate |
HTML Tags.
|
private |
Type of the document as defined in the XML file <!DOCTYPE >. If the tag is omitted, the string is empty.
|
private |
Determine if the current closing tag is a closing one.
|
private |
Namespaces defined in the XML file.
|
private |
Current attributes treated.
|
private |
Avoid spaces in the XML file when creating it.
|
private |
Current Depth.
|
private |
Position of the last "token" extracted.
|
private |
Determine what is currently treated.
|
private |
Determine which header attribute is actually treated.
|
private |
Specify if HTML codes are accepted.
|
private |
HTMLMode active.
|
private |
Is the "</html>" found?
|
private |
Pointer to the current HTML tag.
|
private |
Define if the analysis must be stopped.