tinyxmlparser.cpp

Go to the documentation of this file.
00001 /*
00002 www.sourceforge.net/projects/tinyxml
00003 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
00004 
00005 This software is provided 'as-is', without any express or implied 
00006 warranty. In no event will the authors be held liable for any 
00007 damages arising from the use of this software.
00008 
00009 Permission is granted to anyone to use this software for any 
00010 purpose, including commercial applications, and to alter it and 
00011 redistribute it freely, subject to the following restrictions:
00012 
00013 1. The origin of this software must not be misrepresented; you must 
00014 not claim that you wrote the original software. If you use this
00015 software in a product, an acknowledgment in the product documentation
00016 would be appreciated but is not required.
00017 
00018 2. Altered source versions must be plainly marked as such, and 
00019 must not be misrepresented as being the original software.
00020 
00021 3. This notice may not be removed or altered from any source 
00022 distribution.
00023 */
00024 
00025 #include "tinyxml.h"
00026 #include <ctype.h>
00027 #include <stddef.h>
00028 
00029 //#define DEBUG_PARSER
00030 #if defined( DEBUG_PARSER )
00031 #   if defined( DEBUG ) && defined( _MSC_VER )
00032 #       include <windows.h>
00033 #       define TIXML_LOG OutputDebugString
00034 #   else
00035 #       define TIXML_LOG printf
00036 #   endif
00037 #endif
00038 
00039 // Note tha "PutString" hardcodes the same list. This
00040 // is less flexible than it appears. Changing the entries
00041 // or order will break putstring.   
00042 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 
00043 {
00044     { "&amp;",  5, '&' },
00045     { "&lt;",   4, '<' },
00046     { "&gt;",   4, '>' },
00047     { "&quot;", 6, '\"' },
00048     { "&apos;", 6, '\'' }
00049 };
00050 
00051 // Bunch of unicode info at:
00052 //      http://www.unicode.org/faq/utf_bom.html
00053 // Including the basic of this table, which determines the #bytes in the
00054 // sequence from the lead byte. 1 placed for invalid sequences --
00055 // although the result will be junk, pass it through as much as possible.
00056 // Beware of the non-characters in UTF-8:   
00057 //              ef bb bf (Microsoft "lead bytes")
00058 //              ef bf be
00059 //              ef bf bf 
00060 
00061 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00062 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00063 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00064 
00065 const int TiXmlBase::utf8ByteTable[256] = 
00066 {
00067     //  0   1   2   3   4   5   6   7   8   9   a   b   c   d   e   f
00068         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x00
00069         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x10
00070         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x20
00071         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x30
00072         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x40
00073         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x50
00074         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x60
00075         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x70 End of ASCII range
00076         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x80 0x80 to 0xc1 invalid
00077         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x90 
00078         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0xa0 
00079         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0xb0 
00080         1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  // 0xc0 0xc2 to 0xdf 2 byte
00081         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  // 0xd0
00082         3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  // 0xe0 0xe0 to 0xef 3 byte
00083         4,  4,  4,  4,  4,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
00084 };
00085 
00086 
00087 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00088 {
00089     const unsigned long BYTE_MASK = 0xBF;
00090     const unsigned long BYTE_MARK = 0x80;
00091     const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00092 
00093     if (input < 0x80) 
00094         *length = 1;
00095     else if ( input < 0x800 )
00096         *length = 2;
00097     else if ( input < 0x10000 )
00098         *length = 3;
00099     else if ( input < 0x200000 )
00100         *length = 4;
00101     else
00102         { *length = 0; return; }    // This code won't covert this correctly anyway.
00103 
00104     output += *length;
00105 
00106     // Scary scary fall throughs.
00107     switch (*length) 
00108     {
00109         case 4:
00110             --output; 
00111             *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00112             input >>= 6;
00113         case 3:
00114             --output; 
00115             *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00116             input >>= 6;
00117         case 2:
00118             --output; 
00119             *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
00120             input >>= 6;
00121         case 1:
00122             --output; 
00123             *output = (char)(input | FIRST_BYTE_MARK[*length]);
00124     }
00125 }
00126 
00127 
00128 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00129 {
00130     // This will only work for low-ascii, everything else is assumed to be a valid
00131     // letter. I'm not sure this is the best approach, but it is quite tricky trying
00132     // to figure out alhabetical vs. not across encoding. So take a very 
00133     // conservative approach.
00134 
00135 //  if ( encoding == TIXML_ENCODING_UTF8 )
00136 //  {
00137         if ( anyByte < 127 )
00138             return isalpha( anyByte );
00139         else
00140             return 1;   // What else to do? The unicode set is huge...get the english ones right.
00141 //  }
00142 //  else
00143 //  {
00144 //      return isalpha( anyByte );
00145 //  }
00146 }
00147 
00148 
00149 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00150 {
00151     // This will only work for low-ascii, everything else is assumed to be a valid
00152     // letter. I'm not sure this is the best approach, but it is quite tricky trying
00153     // to figure out alhabetical vs. not across encoding. So take a very 
00154     // conservative approach.
00155 
00156 //  if ( encoding == TIXML_ENCODING_UTF8 )
00157 //  {
00158         if ( anyByte < 127 )
00159             return isalnum( anyByte );
00160         else
00161             return 1;   // What else to do? The unicode set is huge...get the english ones right.
00162 //  }
00163 //  else
00164 //  {
00165 //      return isalnum( anyByte );
00166 //  }
00167 }
00168 
00169 
00170 class TiXmlParsingData
00171 {
00172     friend class TiXmlDocument;
00173   public:
00174     void Stamp( const char* now, TiXmlEncoding encoding );
00175 
00176     const TiXmlCursor& Cursor() { return cursor; }
00177 
00178   private:
00179     // Only used by the document!
00180     TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00181     {
00182         assert( start );
00183         stamp = start;
00184         tabsize = _tabsize;
00185         cursor.row = row;
00186         cursor.col = col;
00187     }
00188 
00189     TiXmlCursor     cursor;
00190     const char*     stamp;
00191     int             tabsize;
00192 };
00193 
00194 
00195 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00196 {
00197     assert( now );
00198 
00199     // Do nothing if the tabsize is 0.
00200     if ( tabsize < 1 )
00201     {
00202         return;
00203     }
00204 
00205     // Get the current row, column.
00206     int row = cursor.row;
00207     int col = cursor.col;
00208     const char* p = stamp;
00209     assert( p );
00210 
00211     while ( p < now )
00212     {
00213         // Treat p as unsigned, so we have a happy compiler.
00214         const unsigned char* pU = (const unsigned char*)p;
00215 
00216         // Code contributed by Fletcher Dunn: (modified by lee)
00217         switch (*pU) {
00218             case 0:
00219                 // We *should* never get here, but in case we do, don't
00220                 // advance past the terminating null character, ever
00221                 return;
00222 
00223             case '\r':
00224                 // bump down to the next line
00225                 ++row;
00226                 col = 0;                
00227                 // Eat the character
00228                 ++p;
00229 
00230                 // Check for \r\n sequence, and treat this as a single character
00231                 if (*p == '\n') {
00232                     ++p;
00233                 }
00234                 break;
00235 
00236             case '\n':
00237                 // bump down to the next line
00238                 ++row;
00239                 col = 0;
00240 
00241                 // Eat the character
00242                 ++p;
00243 
00244                 // Check for \n\r sequence, and treat this as a single
00245                 // character.  (Yes, this bizarre thing does occur still
00246                 // on some arcane platforms...)
00247                 if (*p == '\r') {
00248                     ++p;
00249                 }
00250                 break;
00251 
00252             case '\t':
00253                 // Eat the character
00254                 ++p;
00255 
00256                 // Skip to next tab stop
00257                 col = (col / tabsize + 1) * tabsize;
00258                 break;
00259 
00260             case TIXML_UTF_LEAD_0:
00261                 if ( encoding == TIXML_ENCODING_UTF8 )
00262                 {
00263                     if ( *(p+1) && *(p+2) )
00264                     {
00265                         // In these cases, don't advance the column. These are
00266                         // 0-width spaces.
00267                         if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00268                             p += 3; 
00269                         else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00270                             p += 3; 
00271                         else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00272                             p += 3; 
00273                         else
00274                             { p +=3; ++col; }   // A normal character.
00275                     }
00276                 }
00277                 else
00278                 {
00279                     ++p;
00280                     ++col;
00281                 }
00282                 break;
00283 
00284             default:
00285                 if ( encoding == TIXML_ENCODING_UTF8 )
00286                 {
00287                     // Eat the 1 to 4 byte utf8 character.
00288                     int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
00289                     if ( step == 0 )
00290                         step = 1;       // Error case from bad encoding, but handle gracefully.
00291                     p += step;
00292 
00293                     // Just advance one column, of course.
00294                     ++col;
00295                 }
00296                 else
00297                 {
00298                     ++p;
00299                     ++col;
00300                 }
00301                 break;
00302         }
00303     }
00304     cursor.row = row;
00305     cursor.col = col;
00306     assert( cursor.row >= -1 );
00307     assert( cursor.col >= -1 );
00308     stamp = p;
00309     assert( stamp );
00310 }
00311 
00312 
00313 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00314 {
00315     if ( !p || !*p )
00316     {
00317         return 0;
00318     }
00319     if ( encoding == TIXML_ENCODING_UTF8 )
00320     {
00321         while ( *p )
00322         {
00323             const unsigned char* pU = (const unsigned char*)p;
00324             
00325             // Skip the stupid Microsoft UTF-8 Byte order marks
00326             if (    *(pU+0)==TIXML_UTF_LEAD_0
00327                  && *(pU+1)==TIXML_UTF_LEAD_1 
00328                  && *(pU+2)==TIXML_UTF_LEAD_2 )
00329             {
00330                 p += 3;
00331                 continue;
00332             }
00333             else if(*(pU+0)==TIXML_UTF_LEAD_0
00334                  && *(pU+1)==0xbfU
00335                  && *(pU+2)==0xbeU )
00336             {
00337                 p += 3;
00338                 continue;
00339             }
00340             else if(*(pU+0)==TIXML_UTF_LEAD_0
00341                  && *(pU+1)==0xbfU
00342                  && *(pU+2)==0xbfU )
00343             {
00344                 p += 3;
00345                 continue;
00346             }
00347 
00348             if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )        // Still using old rules for white space.
00349                 ++p;
00350             else
00351                 break;
00352         }
00353     }
00354     else
00355     {
00356         while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
00357             ++p;
00358     }
00359 
00360     return p;
00361 }
00362 
00363 #ifdef TIXML_USE_STL
00364 /*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
00365 {
00366     for( ;; )
00367     {
00368         if ( !in->good() ) return false;
00369 
00370         int c = in->peek();
00371         // At this scope, we can't get to a document. So fail silently.
00372         if ( !IsWhiteSpace( c ) || c <= 0 )
00373             return true;
00374 
00375         *tag += (char) in->get();
00376     }
00377 }
00378 
00379 /*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
00380 {
00381     //assert( character > 0 && character < 128 );   // else it won't work in utf-8
00382     while ( in->good() )
00383     {
00384         int c = in->peek();
00385         if ( c == character )
00386             return true;
00387         if ( c <= 0 )       // Silent failure: can't get document at this scope
00388             return false;
00389 
00390         in->get();
00391         *tag += (char) c;
00392     }
00393     return false;
00394 }
00395 #endif
00396 
00397 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00398 {
00399     *name = "";
00400     assert( p );
00401 
00402     // Names start with letters or underscores.
00403     // Of course, in unicode, tinyxml has no idea what a letter *is*. The
00404     // algorithm is generous.
00405     //
00406     // After that, they can be letters, underscores, numbers,
00407     // hyphens, or colons. (Colons are valid ony for namespaces,
00408     // but tinyxml can't tell namespaces from names.)
00409     if (    p && *p 
00410          && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00411     {
00412         while(      p && *p
00413                 &&  (       IsAlphaNum( (unsigned char ) *p, encoding ) 
00414                          || *p == '_'
00415                          || *p == '-'
00416                          || *p == '.'
00417                          || *p == ':' ) )
00418         {
00419             (*name) += *p;
00420             ++p;
00421         }
00422         return p;
00423     }
00424     return 0;
00425 }
00426 
00427 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00428 {
00429     // Presume an entity, and pull it out.
00430     TIXML_STRING ent;
00431     int i;
00432     *length = 0;
00433 
00434     if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00435     {
00436         unsigned long ucs = 0;
00437         ptrdiff_t delta = 0;
00438         unsigned mult = 1;
00439 
00440         if ( *(p+2) == 'x' )
00441         {
00442             // Hexadecimal.
00443             if ( !*(p+3) ) return 0;
00444 
00445             const char* q = p+3;
00446             q = strchr( q, ';' );
00447 
00448             if ( !q || !*q ) return 0;
00449 
00450             delta = q-p;
00451             --q;
00452 
00453             while ( *q != 'x' )
00454             {
00455                 if ( *q >= '0' && *q <= '9' )
00456                     ucs += mult * (*q - '0');
00457                 else if ( *q >= 'a' && *q <= 'f' )
00458                     ucs += mult * (*q - 'a' + 10);
00459                 else if ( *q >= 'A' && *q <= 'F' )
00460                     ucs += mult * (*q - 'A' + 10 );
00461                 else 
00462                     return 0;
00463                 mult *= 16;
00464                 --q;
00465             }
00466         }
00467         else
00468         {
00469             // Decimal.
00470             if ( !*(p+2) ) return 0;
00471 
00472             const char* q = p+2;
00473             q = strchr( q, ';' );
00474 
00475             if ( !q || !*q ) return 0;
00476 
00477             delta = q-p;
00478             --q;
00479 
00480             while ( *q != '#' )
00481             {
00482                 if ( *q >= '0' && *q <= '9' )
00483                     ucs += mult * (*q - '0');
00484                 else 
00485                     return 0;
00486                 mult *= 10;
00487                 --q;
00488             }
00489         }
00490         if ( encoding == TIXML_ENCODING_UTF8 )
00491         {
00492             // convert the UCS to UTF-8
00493             ConvertUTF32ToUTF8( ucs, value, length );
00494         }
00495         else
00496         {
00497             *value = (char)ucs;
00498             *length = 1;
00499         }
00500         return p + delta + 1;
00501     }
00502 
00503     // Now try to match it.
00504     for( i=0; i<NUM_ENTITY; ++i )
00505     {
00506         if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00507         {
00508             assert( strlen( entity[i].str ) == entity[i].strLength );
00509             *value = entity[i].chr;
00510             *length = 1;
00511             return ( p + entity[i].strLength );
00512         }
00513     }
00514 
00515     // So it wasn't an entity, its unrecognized, or something like that.
00516     *value = *p;    // Don't put back the last one, since we return it!
00517     return p+1;
00518 }
00519 
00520 
00521 bool TiXmlBase::StringEqual( const char* p,
00522                              const char* tag,
00523                              bool ignoreCase,
00524                              TiXmlEncoding encoding )
00525 {
00526     assert( p );
00527     assert( tag );
00528     if ( !p || !*p )
00529     {
00530         assert( 0 );
00531         return false;
00532     }
00533 
00534     const char* q = p;
00535 
00536     if ( ignoreCase )
00537     {
00538         while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00539         {
00540             ++q;
00541             ++tag;
00542         }
00543 
00544         if ( *tag == 0 )
00545             return true;
00546     }
00547     else
00548     {
00549         while ( *q && *tag && *q == *tag )
00550         {
00551             ++q;
00552             ++tag;
00553         }
00554 
00555         if ( *tag == 0 )        // Have we found the end of the tag, and everything equal?
00556             return true;
00557     }
00558     return false;
00559 }
00560 
00561 const char* TiXmlBase::ReadText(    const char* p, 
00562                                     TIXML_STRING * text, 
00563                                     bool trimWhiteSpace, 
00564                                     const char* endTag, 
00565                                     bool caseInsensitive,
00566                                     TiXmlEncoding encoding )
00567 {
00568     *text = "";
00569     if (    !trimWhiteSpace         // certain tags always keep whitespace
00570          || !condenseWhiteSpace )   // if true, whitespace is always kept
00571     {
00572         // Keep all the white space.
00573         while (    p && *p
00574                 && !StringEqual( p, endTag, caseInsensitive, encoding )
00575               )
00576         {
00577             int len;
00578             char cArr[4] = { 0, 0, 0, 0 };
00579             p = GetChar( p, cArr, &len, encoding );
00580             text->append( cArr, len );
00581         }
00582     }
00583     else
00584     {
00585         bool whitespace = false;
00586 
00587         // Remove leading white space:
00588         p = SkipWhiteSpace( p, encoding );
00589         while (    p && *p
00590                 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00591         {
00592             if ( *p == '\r' || *p == '\n' )
00593             {
00594                 whitespace = true;
00595                 ++p;
00596             }
00597             else if ( IsWhiteSpace( *p ) )
00598             {
00599                 whitespace = true;
00600                 ++p;
00601             }
00602             else
00603             {
00604                 // If we've found whitespace, add it before the
00605                 // new character. Any whitespace just becomes a space.
00606                 if ( whitespace )
00607                 {
00608                     (*text) += ' ';
00609                     whitespace = false;
00610                 }
00611                 int len;
00612                 char cArr[4] = { 0, 0, 0, 0 };
00613                 p = GetChar( p, cArr, &len, encoding );
00614                 if ( len == 1 )
00615                     (*text) += cArr[0]; // more efficient
00616                 else
00617                     text->append( cArr, len );
00618             }
00619         }
00620     }
00621     return p + strlen( endTag );
00622 }
00623 
00624 #ifdef TIXML_USE_STL
00625 
00626 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
00627 {
00628     // The basic issue with a document is that we don't know what we're
00629     // streaming. Read something presumed to be a tag (and hope), then
00630     // identify it, and call the appropriate stream method on the tag.
00631     //
00632     // This "pre-streaming" will never read the closing ">" so the
00633     // sub-tag can orient itself.
00634 
00635     if ( !StreamTo( in, '<', tag ) ) 
00636     {
00637         SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00638         return;
00639     }
00640 
00641     while ( in->good() )
00642     {
00643         int tagIndex = (int) tag->length();
00644         while ( in->good() && in->peek() != '>' )
00645         {
00646             int c = in->get();
00647             if ( c <= 0 )
00648             {
00649                 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00650                 break;
00651             }
00652             (*tag) += (char) c;
00653         }
00654 
00655         if ( in->good() )
00656         {
00657             // We now have something we presume to be a node of 
00658             // some sort. Identify it, and call the node to
00659             // continue streaming.
00660             TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00661 
00662             if ( node )
00663             {
00664                 node->StreamIn( in, tag );
00665                 bool isElement = node->ToElement() != 0;
00666                 delete node;
00667                 node = 0;
00668 
00669                 // If this is the root element, we're done. Parsing will be
00670                 // done by the >> operator.
00671                 if ( isElement )
00672                 {
00673                     return;
00674                 }
00675             }
00676             else
00677             {
00678                 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00679                 return;
00680             }
00681         }
00682     }
00683     // We should have returned sooner.
00684     SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00685 }
00686 
00687 #endif
00688 
00689 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00690 {
00691     ClearError();
00692 
00693     // Parse away, at the document level. Since a document
00694     // contains nothing but other tags, most of what happens
00695     // here is skipping white space.
00696     if ( !p || !*p )
00697     {
00698         SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00699         return 0;
00700     }
00701 
00702     // Note that, for a document, this needs to come
00703     // before the while space skip, so that parsing
00704     // starts from the pointer we are given.
00705     location.Clear();
00706     if ( prevData )
00707     {
00708         location.row = prevData->cursor.row;
00709         location.col = prevData->cursor.col;
00710     }
00711     else
00712     {
00713         location.row = 0;
00714         location.col = 0;
00715     }
00716     TiXmlParsingData data( p, TabSize(), location.row, location.col );
00717     location = data.Cursor();
00718 
00719     if ( encoding == TIXML_ENCODING_UNKNOWN )
00720     {
00721         // Check for the Microsoft UTF-8 lead bytes.
00722         const unsigned char* pU = (const unsigned char*)p;
00723         if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00724              && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00725              && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00726         {
00727             encoding = TIXML_ENCODING_UTF8;
00728             useMicrosoftBOM = true;
00729         }
00730     }
00731 
00732     p = SkipWhiteSpace( p, encoding );
00733     if ( !p )
00734     {
00735         SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00736         return 0;
00737     }
00738 
00739     while ( p && *p )
00740     {
00741         TiXmlNode* node = Identify( p, encoding );
00742         if ( node )
00743         {
00744             p = node->Parse( p, &data, encoding );
00745             LinkEndChild( node );
00746         }
00747         else
00748         {
00749             break;
00750         }
00751 
00752         // Did we get encoding info?
00753         if (    encoding == TIXML_ENCODING_UNKNOWN
00754              && node->ToDeclaration() )
00755         {
00756             TiXmlDeclaration* dec = node->ToDeclaration();
00757             const char* enc = dec->Encoding();
00758             assert( enc );
00759 
00760             if ( *enc == 0 )
00761                 encoding = TIXML_ENCODING_UTF8;
00762             else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00763                 encoding = TIXML_ENCODING_UTF8;
00764             else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00765                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
00766             else 
00767                 encoding = TIXML_ENCODING_LEGACY;
00768         }
00769 
00770         p = SkipWhiteSpace( p, encoding );
00771     }
00772 
00773     // Was this empty?
00774     if ( !firstChild ) {
00775         SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00776         return 0;
00777     }
00778 
00779     // All is well.
00780     return p;
00781 }
00782 
00783 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00784 {   
00785     // The first error in a chain is more accurate - don't set again!
00786     if ( error )
00787         return;
00788 
00789     assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00790     error   = true;
00791     errorId = err;
00792     errorDesc = errorString[ errorId ];
00793 
00794     errorLocation.Clear();
00795     if ( pError && data )
00796     {
00797         data->Stamp( pError, encoding );
00798         errorLocation = data->Cursor();
00799     }
00800 }
00801 
00802 
00803 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00804 {
00805     TiXmlNode* returnNode = 0;
00806 
00807     p = SkipWhiteSpace( p, encoding );
00808     if( !p || !*p || *p != '<' )
00809     {
00810         return 0;
00811     }
00812 
00813     TiXmlDocument* doc = GetDocument();
00814     p = SkipWhiteSpace( p, encoding );
00815 
00816     if ( !p || !*p )
00817     {
00818         return 0;
00819     }
00820 
00821     // What is this thing? 
00822     // - Elements start with a letter or underscore, but xml is reserved.
00823     // - Comments: <!--
00824     // - Decleration: <?xml
00825     // - Everthing else is unknown to tinyxml.
00826     //
00827 
00828     const char* xmlHeader = { "<?xml" };
00829     const char* commentHeader = { "<!--" };
00830     const char* dtdHeader = { "<!" };
00831     const char* cdataHeader = { "<![CDATA[" };
00832 
00833     if ( StringEqual( p, xmlHeader, true, encoding ) )
00834     {
00835         #ifdef DEBUG_PARSER
00836             TIXML_LOG( "XML parsing Declaration\n" );
00837         #endif
00838         returnNode = new TiXmlDeclaration();
00839     }
00840     else if ( StringEqual( p, commentHeader, false, encoding ) )
00841     {
00842         #ifdef DEBUG_PARSER
00843             TIXML_LOG( "XML parsing Comment\n" );
00844         #endif
00845         returnNode = new TiXmlComment();
00846     }
00847     else if ( StringEqual( p, cdataHeader, false, encoding ) )
00848     {
00849         #ifdef DEBUG_PARSER
00850             TIXML_LOG( "XML parsing CDATA\n" );
00851         #endif
00852         TiXmlText* text = new TiXmlText( "" );
00853         text->SetCDATA( true );
00854         returnNode = text;
00855     }
00856     else if ( StringEqual( p, dtdHeader, false, encoding ) )
00857     {
00858         #ifdef DEBUG_PARSER
00859             TIXML_LOG( "XML parsing Unknown(1)\n" );
00860         #endif
00861         returnNode = new TiXmlUnknown();
00862     }
00863     else if (    IsAlpha( *(p+1), encoding )
00864               || *(p+1) == '_' )
00865     {
00866         #ifdef DEBUG_PARSER
00867             TIXML_LOG( "XML parsing Element\n" );
00868         #endif
00869         returnNode = new TiXmlElement( "" );
00870     }
00871     else
00872     {
00873         #ifdef DEBUG_PARSER
00874             TIXML_LOG( "XML parsing Unknown(2)\n" );
00875         #endif
00876         returnNode = new TiXmlUnknown();
00877     }
00878 
00879     if ( returnNode )
00880     {
00881         // Set the parent, so it can report errors
00882         returnNode->parent = this;
00883     }
00884     else
00885     {
00886         if ( doc )
00887             doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
00888     }
00889     return returnNode;
00890 }
00891 
00892 #ifdef TIXML_USE_STL
00893 
00894 void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
00895 {
00896     // We're called with some amount of pre-parsing. That is, some of "this"
00897     // element is in "tag". Go ahead and stream to the closing ">"
00898     while( in->good() )
00899     {
00900         int c = in->get();
00901         if ( c <= 0 )
00902         {
00903             TiXmlDocument* document = GetDocument();
00904             if ( document )
00905                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00906             return;
00907         }
00908         (*tag) += (char) c ;
00909         
00910         if ( c == '>' )
00911             break;
00912     }
00913 
00914     if ( tag->length() < 3 ) return;
00915 
00916     // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
00917     // If not, identify and stream.
00918 
00919     if (    tag->at( tag->length() - 1 ) == '>' 
00920          && tag->at( tag->length() - 2 ) == '/' )
00921     {
00922         // All good!
00923         return;
00924     }
00925     else if ( tag->at( tag->length() - 1 ) == '>' )
00926     {
00927         // There is more. Could be:
00928         //      text
00929         //      closing tag
00930         //      another node.
00931         for ( ;; )
00932         {
00933             StreamWhiteSpace( in, tag );
00934 
00935             // Do we have text?
00936             if ( in->good() && in->peek() != '<' ) 
00937             {
00938                 // Yep, text.
00939                 TiXmlText text( "" );
00940                 text.StreamIn( in, tag );
00941 
00942                 // What follows text is a closing tag or another node.
00943                 // Go around again and figure it out.
00944                 continue;
00945             }
00946 
00947             // We now have either a closing tag...or another node.
00948             // We should be at a "<", regardless.
00949             if ( !in->good() ) return;
00950             assert( in->peek() == '<' );
00951             int tagIndex = (int) tag->length();
00952 
00953             bool closingTag = false;
00954             bool firstCharFound = false;
00955 
00956             for( ;; )
00957             {
00958                 if ( !in->good() )
00959                     return;
00960 
00961                 int c = in->peek();
00962                 if ( c <= 0 )
00963                 {
00964                     TiXmlDocument* document = GetDocument();
00965                     if ( document )
00966                         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00967                     return;
00968                 }
00969                 
00970                 if ( c == '>' )
00971                     break;
00972 
00973                 *tag += (char) c;
00974                 in->get();
00975 
00976                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
00977                 {
00978                     firstCharFound = true;
00979                     if ( c == '/' )
00980                         closingTag = true;
00981                 }
00982             }
00983             // If it was a closing tag, then read in the closing '>' to clean up the input stream.
00984             // If it was not, the streaming will be done by the tag.
00985             if ( closingTag )
00986             {
00987                 if ( !in->good() )
00988                     return;
00989 
00990                 int c = in->get();
00991                 if ( c <= 0 )
00992                 {
00993                     TiXmlDocument* document = GetDocument();
00994                     if ( document )
00995                         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00996                     return;
00997                 }
00998                 assert( c == '>' );
00999                 *tag += (char) c;
01000 
01001                 // We are done, once we've found our closing tag.
01002                 return;
01003             }
01004             else
01005             {
01006                 // If not a closing tag, id it, and stream.
01007                 const char* tagloc = tag->c_str() + tagIndex;
01008                 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01009                 if ( !node )
01010                     return;
01011                 node->StreamIn( in, tag );
01012                 delete node;
01013                 node = 0;
01014 
01015                 // No return: go around from the beginning: text, closing tag, or node.
01016             }
01017         }
01018     }
01019 }
01020 #endif
01021 
01022 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01023 {
01024     p = SkipWhiteSpace( p, encoding );
01025     TiXmlDocument* document = GetDocument();
01026 
01027     if ( !p || !*p )
01028     {
01029         if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01030         return 0;
01031     }
01032 
01033     if ( data )
01034     {
01035         data->Stamp( p, encoding );
01036         location = data->Cursor();
01037     }
01038 
01039     if ( *p != '<' )
01040     {
01041         if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01042         return 0;
01043     }
01044 
01045     p = SkipWhiteSpace( p+1, encoding );
01046 
01047     // Read the name.
01048     const char* pErr = p;
01049 
01050     p = ReadName( p, &value, encoding );
01051     if ( !p || !*p )
01052     {
01053         if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01054         return 0;
01055     }
01056 
01057     TIXML_STRING endTag ("</");
01058     endTag += value;
01059     endTag += ">";
01060 
01061     // Check for and read attributes. Also look for an empty
01062     // tag or an end tag.
01063     while ( p && *p )
01064     {
01065         pErr = p;
01066         p = SkipWhiteSpace( p, encoding );
01067         if ( !p || !*p )
01068         {
01069             if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01070             return 0;
01071         }
01072         if ( *p == '/' )
01073         {
01074             ++p;
01075             // Empty tag.
01076             if ( *p  != '>' )
01077             {
01078                 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );     
01079                 return 0;
01080             }
01081             return (p+1);
01082         }
01083         else if ( *p == '>' )
01084         {
01085             // Done with attributes (if there were any.)
01086             // Read the value -- which can include other
01087             // elements -- read the end tag, and return.
01088             ++p;
01089             p = ReadValue( p, data, encoding );     // Note this is an Element method, and will set the error if one happens.
01090             if ( !p || !*p )
01091                 return 0;
01092 
01093             // We should find the end tag now
01094             if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01095             {
01096                 p += endTag.length();
01097                 return p;
01098             }
01099             else
01100             {
01101                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01102                 return 0;
01103             }
01104         }
01105         else
01106         {
01107             // Try to read an attribute:
01108             TiXmlAttribute* attrib = new TiXmlAttribute();
01109             if ( !attrib )
01110             {
01111                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
01112                 return 0;
01113             }
01114 
01115             attrib->SetDocument( document );
01116             const char* pErr = p;
01117             p = attrib->Parse( p, data, encoding );
01118 
01119             if ( !p || !*p )
01120             {
01121                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01122                 delete attrib;
01123                 return 0;
01124             }
01125 
01126             // Handle the strange case of double attributes:
01127             TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01128             if ( node )
01129             {
01130                 node->SetValue( attrib->Value() );
01131                 delete attrib;
01132                 return 0;
01133             }
01134 
01135             attributeSet.Add( attrib );
01136         }
01137     }
01138     return p;
01139 }
01140 
01141 
01142 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01143 {
01144     TiXmlDocument* document = GetDocument();
01145 
01146     // Read in text and elements in any order.
01147     const char* pWithWhiteSpace = p;
01148     p = SkipWhiteSpace( p, encoding );
01149 
01150     while ( p && *p )
01151     {
01152         if ( *p != '<' )
01153         {
01154             // Take what we have, make a text element.
01155             TiXmlText* textNode = new TiXmlText( "" );
01156 
01157             if ( !textNode )
01158             {
01159                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
01160                     return 0;
01161             }
01162 
01163             if ( TiXmlBase::IsWhiteSpaceCondensed() )
01164             {
01165                 p = textNode->Parse( p, data, encoding );
01166             }
01167             else
01168             {
01169                 // Special case: we want to keep the white space
01170                 // so that leading spaces aren't removed.
01171                 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01172             }
01173 
01174             if ( !textNode->Blank() )
01175                 LinkEndChild( textNode );
01176             else
01177                 delete textNode;
01178         } 
01179         else 
01180         {
01181             // We hit a '<'
01182             // Have we hit a new element or an end tag? This could also be
01183             // a TiXmlText in the "CDATA" style.
01184             if ( StringEqual( p, "</", false, encoding ) )
01185             {
01186                 return p;
01187             }
01188             else
01189             {
01190                 TiXmlNode* node = Identify( p, encoding );
01191                 if ( node )
01192                 {
01193                     p = node->Parse( p, data, encoding );
01194                     LinkEndChild( node );
01195                 }               
01196                 else
01197                 {
01198                     return 0;
01199                 }
01200             }
01201         }
01202         pWithWhiteSpace = p;
01203         p = SkipWhiteSpace( p, encoding );
01204     }
01205 
01206     if ( !p )
01207     {
01208         if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01209     }   
01210     return p;
01211 }
01212 
01213 
01214 #ifdef TIXML_USE_STL
01215 void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01216 {
01217     while ( in->good() )
01218     {
01219         int c = in->get();  
01220         if ( c <= 0 )
01221         {
01222             TiXmlDocument* document = GetDocument();
01223             if ( document )
01224                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01225             return;
01226         }
01227         (*tag) += (char) c;
01228 
01229         if ( c == '>' )
01230         {
01231             // All is well.
01232             return;     
01233         }
01234     }
01235 }
01236 #endif
01237 
01238 
01239 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01240 {
01241     TiXmlDocument* document = GetDocument();
01242     p = SkipWhiteSpace( p, encoding );
01243 
01244     if ( data )
01245     {
01246         data->Stamp( p, encoding );
01247         location = data->Cursor();
01248     }
01249     if ( !p || !*p || *p != '<' )
01250     {
01251         if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01252         return 0;
01253     }
01254     ++p;
01255     value = "";
01256 
01257     while ( p && *p && *p != '>' )
01258     {
01259         value += *p;
01260         ++p;
01261     }
01262 
01263     if ( !p )
01264     {
01265         if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01266     }
01267     if ( *p == '>' )
01268         return p+1;
01269     return p;
01270 }
01271 
01272 #ifdef TIXML_USE_STL
01273 void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01274 {
01275     while ( in->good() )
01276     {
01277         int c = in->get();  
01278         if ( c <= 0 )
01279         {
01280             TiXmlDocument* document = GetDocument();
01281             if ( document )
01282                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01283             return;
01284         }
01285 
01286         (*tag) += (char) c;
01287 
01288         if ( c == '>' 
01289              && tag->at( tag->length() - 2 ) == '-'
01290              && tag->at( tag->length() - 3 ) == '-' )
01291         {
01292             // All is well.
01293             return;     
01294         }
01295     }
01296 }
01297 #endif
01298 
01299 
01300 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01301 {
01302     TiXmlDocument* document = GetDocument();
01303     value = "";
01304 
01305     p = SkipWhiteSpace( p, encoding );
01306 
01307     if ( data )
01308     {
01309         data->Stamp( p, encoding );
01310         location = data->Cursor();
01311     }
01312     const char* startTag = "<!--";
01313     const char* endTag   = "-->";
01314 
01315     if ( !StringEqual( p, startTag, false, encoding ) )
01316     {
01317         document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01318         return 0;
01319     }
01320     p += strlen( startTag );
01321     p = ReadText( p, &value, false, endTag, false, encoding );
01322     return p;
01323 }
01324 
01325 
01326 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01327 {
01328     p = SkipWhiteSpace( p, encoding );
01329     if ( !p || !*p ) return 0;
01330 
01331     int tabsize = 4;
01332     if ( document )
01333         tabsize = document->TabSize();
01334 
01335     if ( data )
01336     {
01337         data->Stamp( p, encoding );
01338         location = data->Cursor();
01339     }
01340     // Read the name, the '=' and the value.
01341     const char* pErr = p;
01342     p = ReadName( p, &name, encoding );
01343     if ( !p || !*p )
01344     {
01345         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01346         return 0;
01347     }
01348     p = SkipWhiteSpace( p, encoding );
01349     if ( !p || !*p || *p != '=' )
01350     {
01351         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01352         return 0;
01353     }
01354 
01355     ++p;    // skip '='
01356     p = SkipWhiteSpace( p, encoding );
01357     if ( !p || !*p )
01358     {
01359         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01360         return 0;
01361     }
01362     
01363     const char* end;
01364 
01365     if ( *p == '\'' )
01366     {
01367         ++p;
01368         end = "\'";
01369         p = ReadText( p, &value, false, end, false, encoding );
01370     }
01371     else if ( *p == '"' )
01372     {
01373         ++p;
01374         end = "\"";
01375         p = ReadText( p, &value, false, end, false, encoding );
01376     }
01377     else
01378     {
01379         // All attribute values should be in single or double quotes.
01380         // But this is such a common error that the parser will try
01381         // its best, even without them.
01382         value = "";
01383         while (    p && *p                                      // existence
01384                 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'  // whitespace
01385                 && *p != '/' && *p != '>' )                     // tag end
01386         {
01387             value += *p;
01388             ++p;
01389         }
01390     }
01391     return p;
01392 }
01393 
01394 #ifdef TIXML_USE_STL
01395 void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01396 {
01397     if ( cdata )
01398     {
01399         int c = in->get();  
01400         if ( c <= 0 )
01401         {
01402             TiXmlDocument* document = GetDocument();
01403             if ( document )
01404                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01405             return;
01406         }
01407 
01408         (*tag) += (char) c;
01409 
01410         if ( c == '>' 
01411              && tag->at( tag->length() - 2 ) == ']'
01412              && tag->at( tag->length() - 3 ) == ']' )
01413         {
01414             // All is well.
01415             return;     
01416         }
01417     }
01418     else
01419     {
01420         while ( in->good() )
01421         {
01422             int c = in->peek(); 
01423             if ( c == '<' )
01424                 return;
01425             if ( c <= 0 )
01426             {
01427                 TiXmlDocument* document = GetDocument();
01428                 if ( document )
01429                     document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01430                 return;
01431             }
01432 
01433             (*tag) += (char) c;
01434             in->get();
01435         }
01436     }
01437 }
01438 #endif
01439 
01440 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01441 {
01442     value = "";
01443     TiXmlDocument* document = GetDocument();
01444 
01445     if ( data )
01446     {
01447         data->Stamp( p, encoding );
01448         location = data->Cursor();
01449     }
01450 
01451     const char* const startTag = "<![CDATA[";
01452     const char* const endTag   = "]]>";
01453 
01454     if ( cdata || StringEqual( p, startTag, false, encoding ) )
01455     {
01456         cdata = true;
01457 
01458         if ( !StringEqual( p, startTag, false, encoding ) )
01459         {
01460             document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01461             return 0;
01462         }
01463         p += strlen( startTag );
01464 
01465         // Keep all the white space, ignore the encoding, etc.
01466         while (    p && *p
01467                 && !StringEqual( p, endTag, false, encoding )
01468               )
01469         {
01470             value += *p;
01471             ++p;
01472         }
01473 
01474         TIXML_STRING dummy; 
01475         p = ReadText( p, &dummy, false, endTag, false, encoding );
01476         return p;
01477     }
01478     else
01479     {
01480         bool ignoreWhite = true;
01481 
01482         const char* end = "<";
01483         p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01484         if ( p )
01485             return p-1; // don't truncate the '<'
01486         return 0;
01487     }
01488 }
01489 
01490 #ifdef TIXML_USE_STL
01491 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01492 {
01493     while ( in->good() )
01494     {
01495         int c = in->get();
01496         if ( c <= 0 )
01497         {
01498             TiXmlDocument* document = GetDocument();
01499             if ( document )
01500                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01501             return;
01502         }
01503         (*tag) += (char) c;
01504 
01505         if ( c == '>' )
01506         {
01507             // All is well.
01508             return;
01509         }
01510     }
01511 }
01512 #endif
01513 
01514 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01515 {
01516     p = SkipWhiteSpace( p, _encoding );
01517     // Find the beginning, find the end, and look for
01518     // the stuff in-between.
01519     TiXmlDocument* document = GetDocument();
01520     if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01521     {
01522         if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01523         return 0;
01524     }
01525     if ( data )
01526     {
01527         data->Stamp( p, _encoding );
01528         location = data->Cursor();
01529     }
01530     p += 5;
01531 
01532     version = "";
01533     encoding = "";
01534     standalone = "";
01535 
01536     while ( p && *p )
01537     {
01538         if ( *p == '>' )
01539         {
01540             ++p;
01541             return p;
01542         }
01543 
01544         p = SkipWhiteSpace( p, _encoding );
01545         if ( StringEqual( p, "version", true, _encoding ) )
01546         {
01547             TiXmlAttribute attrib;
01548             p = attrib.Parse( p, data, _encoding );     
01549             version = attrib.Value();
01550         }
01551         else if ( StringEqual( p, "encoding", true, _encoding ) )
01552         {
01553             TiXmlAttribute attrib;
01554             p = attrib.Parse( p, data, _encoding );     
01555             encoding = attrib.Value();
01556         }
01557         else if ( StringEqual( p, "standalone", true, _encoding ) )
01558         {
01559             TiXmlAttribute attrib;
01560             p = attrib.Parse( p, data, _encoding );     
01561             standalone = attrib.Value();
01562         }
01563         else
01564         {
01565             // Read over whatever it is.
01566             while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01567                 ++p;
01568         }
01569     }
01570     return 0;
01571 }
01572 
01573 bool TiXmlText::Blank() const
01574 {
01575     for ( unsigned i=0; i<value.length(); i++ )
01576         if ( !IsWhiteSpace( value[i] ) )
01577             return false;
01578     return true;
01579 }
01580 

Generated on Wed May 23 06:00:15 2012 for cpp by  doxygen 1.5.6