AnimatLab  2
Test
MarkupSTL.cpp
1 // MarkupSTL.cpp: implementation of the CMarkupSTL class.
2 //
3 // Markup Release 6.3
4 // Copyright (C) 1999-2002 First Objective Software, Inc. All rights reserved
5 // Go to www.firstobject.com for the latest CMarkup and EDOM documentation
6 // Use in commercial applications requires written permission
7 // This software is provided "as is", with no warranty.
8 
9 #include "StdAfx.h"
10 
11 using namespace std;
12 
13 #ifdef _DEBUG
14 #undef THIS_FILE
15 static char THIS_FILE[]=__FILE__;
16 #define new DEBUG_NEW
17 #endif
18 
19 namespace StdUtils
20 {
21 
22 void CMarkupSTL::operator=( const CMarkupSTL& markup )
23 {
24  m_iPosParent = markup.m_iPosParent;
25  m_iPos = markup.m_iPos;
26  m_iPosChild = markup.m_iPosChild;
27  m_iPosFree = markup.m_iPosFree;
28  m_nNodeType = markup.m_nNodeType;
29  m_aPos = markup.m_aPos;
30  m_strDoc = markup.m_strDoc;
31  MARKUP_SETDEBUGSTATE;
32 }
33 
34 bool CMarkupSTL::SetDoc( const char* szDoc )
35 {
36  // Reset indexes
37  m_iPosFree = 1;
38  ResetPos();
39  m_mapSavedPos.clear();
40 
41  // Set document text
42  if ( szDoc )
43  m_strDoc = szDoc;
44  else
45  m_strDoc.erase();
46 
47  // Starting size of position array: 1 element per 64 bytes of document
48  // Tight fit when parsing small doc, only 0 to 2 reallocs when parsing large doc
49  // Start at 8 when creating new document
50  int nStartSize = m_strDoc.size() / 64 + 8;
51  if ( m_aPos.size() < nStartSize )
52  m_aPos.resize( nStartSize );
53 
54  // Parse document
55  bool bWellFormed = false;
56  if ( m_strDoc.size() )
57  {
58  m_aPos[0].Clear();
59  int iPos = x_ParseElem( 0 );
60  if ( iPos > 0 )
61  {
62  m_aPos[0].iElemChild = iPos;
63  bWellFormed = true;
64  }
65  }
66 
67  // Clear indexes if parse failed or empty document
68  if ( ! bWellFormed )
69  {
70  m_aPos[0].Clear();
71  m_iPosFree = 1;
72  }
73 
74  ResetPos();
75  return bWellFormed;
76 };
77 
78 bool CMarkupSTL::IsWellFormed()
79 {
80  if ( m_aPos.size() && m_aPos[0].iElemChild )
81  return true;
82  return false;
83 }
84 
85 bool CMarkupSTL::Load( const char* szFileName )
86 {
87  // Load document from file
88  bool bResult = false;
89  FILE* fp = fopen( szFileName, "rb" );
90  if ( fp )
91  {
92  // Determine file length
93  fseek( fp, 0L, SEEK_END );
94  int nFileLen = ftell(fp);
95  fseek( fp, 0L, SEEK_SET );
96 
97  // Load string
98  allocator<char> mem;
99  allocator<char>::pointer pBuffer = mem.allocate(nFileLen+1, NULL);
100  if ( fread( pBuffer, nFileLen, 1, fp ) == 1 )
101  {
102  pBuffer[nFileLen] = '\0';
103  bResult = SetDoc( pBuffer );
104  }
105  fclose(fp);
106  mem.deallocate(pBuffer,1);
107  }
108  if ( ! bResult )
109  SetDoc(NULL);
110  MARKUP_SETDEBUGSTATE;
111  return bResult;
112 }
113 
114 bool CMarkupSTL::Save( const char* szFileName )
115 {
116  // Save document to file
117  bool bResult = false;
118  FILE* fp = fopen( szFileName, "wb" );
119  if ( fp )
120  {
121  // Save string
122  int nFileLen = m_strDoc.size();
123  if ( ! nFileLen )
124  bResult = true;
125  else if ( fwrite( m_strDoc.c_str(), nFileLen, 1, fp ) == 1 )
126  bResult = true;
127  fclose(fp);
128  }
129  return bResult;
130 }
131 
132 bool CMarkupSTL::FindElem( const char* szName )
133 {
134  // Change current position only if found
135  //
136  if ( m_aPos.size() )
137  {
138  int iPos = x_FindElem( m_iPosParent, m_iPos, szName );
139  if ( iPos )
140  {
141  // Assign new position
142  x_SetPos( m_aPos[iPos].iElemParent, iPos, 0 );
143  return true;
144  }
145  }
146  return false;
147 }
148 
149 bool CMarkupSTL::FindChildElem( const char* szName )
150 {
151  // Change current child position only if found
152  //
153  // Shorthand: call this with no current main position
154  // means find child under root element
155  if ( ! m_iPos )
156  FindElem();
157 
158  int iPosChild = x_FindElem( m_iPos, m_iPosChild, szName );
159  if ( iPosChild )
160  {
161  // Assign new position
162  int iPos = m_aPos[iPosChild].iElemParent;
163  x_SetPos( m_aPos[iPos].iElemParent, iPos, iPosChild );
164  return true;
165  }
166 
167  return false;
168 }
169 
170 std::string CMarkupSTL::GetTagName() const
171 {
172  // Return the tag name at the current main position
173  std::string strTagName;
174 
175  if ( m_iPos )
176  strTagName = x_GetTagName( m_iPos );
177  return strTagName;
178 }
179 
180 bool CMarkupSTL::IntoElem()
181 {
182  // If there is no child position and IntoElem is called it will succeed in release 6.3
183  // (A subsequent call to FindElem will find the first element)
184  // The following short-hand behavior was never part of EDOM and was misleading
185  // It would find a child element if there was no current child element position and go into it
186  // It is removed in release 6.3, this change is NOT backwards compatible!
187  // if ( ! m_iPosChild )
188  // FindChildElem();
189 
190  if ( m_iPos && m_nNodeType == MNT_ELEMENT )
191  {
192  x_SetPos( m_iPos, m_iPosChild, 0 );
193  return true;
194  }
195  return false;
196 }
197 
198 bool CMarkupSTL::OutOfElem()
199 {
200  // Go to parent element
201  if ( m_iPosParent )
202  {
203  x_SetPos( m_aPos[m_iPosParent].iElemParent, m_iPosParent, m_iPos );
204  return true;
205  }
206  return false;
207 }
208 
209 std::string CMarkupSTL::GetAttribName( int n ) const
210 {
211  // Return nth attribute name of main position
212  if ( ! m_iPos || m_nNodeType != MNT_ELEMENT )
213  return "";
214 
215  TokenPos token( m_strDoc.c_str() );
216  token.nNext = m_aPos[m_iPos].nStartL + 1;
217  for ( int nAttrib=0; nAttrib<=n; ++nAttrib )
218  if ( ! x_FindAttrib(token) )
219  return "";
220 
221  // Return substring of document
222  return x_GetToken( token );
223 }
224 
225 bool CMarkupSTL::SavePos( const char* szPosName )
226 {
227  // Save current element position in saved position map
228  if ( szPosName )
229  {
230  SavedPos savedpos;
231  savedpos.iPosParent = m_iPosParent;
232  savedpos.iPos = m_iPos;
233  savedpos.iPosChild = m_iPosChild;
234  string strPosName = szPosName;
235  m_mapSavedPos[strPosName] = savedpos;
236  return true;
237  }
238  return false;
239 }
240 
241 bool CMarkupSTL::RestorePos( const char* szPosName )
242 {
243  // Restore element position if found in saved position map
244  if ( szPosName )
245  {
246  std::string strPosName = szPosName;
247  mapSavedPosT::const_iterator iterSavePos = m_mapSavedPos.find( strPosName );
248  if ( iterSavePos != m_mapSavedPos.end() )
249  {
250  SavedPos savedpos = (*iterSavePos).second;
251  x_SetPos( savedpos.iPosParent, savedpos.iPos, savedpos.iPosChild );
252  return true;
253  }
254  }
255  return false;
256 }
257 
258 bool CMarkupSTL::GetOffsets( int& nStart, int& nEnd ) const
259 {
260  // Return document offsets of current main position element
261  // This is not part of EDOM but is used by the Markup project
262  if ( m_iPos )
263  {
264  nStart = m_aPos[m_iPos].nStartL;
265  nEnd = m_aPos[m_iPos].nEndR;
266  return true;
267  }
268  return false;
269 }
270 
271 std::string CMarkupSTL::GetChildSubDoc() const
272 {
273  if ( m_iPosChild )
274  {
275  int nL = m_aPos[m_iPosChild].nStartL;
276  int nR = m_aPos[m_iPosChild].nEndR + 1;
277  TokenPos token( m_strDoc.c_str() );
278  token.nNext = nR;
279  if ( ! x_FindToken(token) || m_strDoc[token.nL] == '<' )
280  nR = token.nL;
281  return m_strDoc.substr( nL, nR - nL );
282  }
283  return "";
284 }
285 
286 bool CMarkupSTL::RemoveElem()
287 {
288  // Remove current main position element
289  if ( m_iPos && m_nNodeType == MNT_ELEMENT )
290  {
291  int iPos = x_RemoveElem( m_iPos );
292  x_SetPos( m_iPosParent, iPos, 0 );
293  return true;
294  }
295  return false;
296 }
297 
298 bool CMarkupSTL::RemoveChildElem()
299 {
300  // Remove current child position element
301  if ( m_iPosChild )
302  {
303  int iPosChild = x_RemoveElem( m_iPosChild );
304  x_SetPos( m_iPosParent, m_iPos, iPosChild );
305  return true;
306  }
307  return false;
308 }
309 
311 // Private Methods
313 
314 int CMarkupSTL::x_GetFreePos()
315 {
316  //
317  // This returns the index of the next unused ElemPos in the array
318  //
319  if ( m_iPosFree == m_aPos.size() )
320  m_aPos.resize( m_iPosFree + m_iPosFree / 2 );
321  ++m_iPosFree;
322  return m_iPosFree - 1;
323 }
324 
325 int CMarkupSTL::x_ReleasePos()
326 {
327  //
328  // This decrements the index of the next unused ElemPos in the array
329  // allowing the element index returned by GetFreePos() to be reused
330  //
331  --m_iPosFree;
332  return 0;
333 }
334 
335 int CMarkupSTL::x_ParseError( const char* szError, const char* szName )
336 {
337  if ( szName )
338  {
339  char szFormat[300];
340  sprintf( szFormat, szError, szName );
341  m_strError = szFormat;
342  }
343  else
344  m_strError = szError;
345  x_ReleasePos();
346  return -1;
347 }
348 
349 int CMarkupSTL::x_ParseElem( int iPosParent )
350 {
351  // This is either called by SetDoc, x_AddSubDoc, or itself recursively
352  // m_aPos[iPosParent].nEndL is where to start parsing for the child element
353  // This returns the new position if a tag is found, otherwise zero
354  // In all cases we need to get a new ElemPos, but release it if unused
355  //
356  int iPos = x_GetFreePos();
357  m_aPos[iPos].nStartL = m_aPos[iPosParent].nEndL;
358  m_aPos[iPos].iElemParent = iPosParent;
359  m_aPos[iPos].iElemChild = 0;
360  m_aPos[iPos].iElemNext = 0;
361 
362  // Start Tag
363  // A loop is used to ignore all remarks tags and special tags
364  // i.e. <?xml version="1.0"?>, and <!-- comment here -->
365  // So any tag beginning with ? or ! is ignored
366  // Loop past ignored tags
367  TokenPos token( m_strDoc.c_str() );
368  token.nNext = m_aPos[iPosParent].nEndL;
369  std::string strName;
370  while ( strName.empty() )
371  {
372  // Look for left angle bracket of start tag
373  m_aPos[iPos].nStartL = token.nNext;
374  if ( ! x_FindChar( token.szDoc, m_aPos[iPos].nStartL, '<' ) )
375  return x_ParseError( "Element tag not found" );
376 
377  // Set parent's End tag to start looking from here (or later)
378  m_aPos[iPosParent].nEndL = m_aPos[iPos].nStartL;
379 
380  // Determine whether this is an element, or bypass other type of node
381  token.nNext = m_aPos[iPos].nStartL + 1;
382  if ( x_FindToken( token ) )
383  {
384  if ( token.bIsString )
385  return x_ParseError( "Tag starts with quote" );
386  char cFirstChar = m_strDoc[token.nL];
387  if ( cFirstChar == '?' || cFirstChar == '!' )
388  {
389  token.nNext = m_aPos[iPos].nStartL;
390  if ( ! x_ParseNode(token) )
391  return x_ParseError( "Invalid node" );
392  }
393  else if ( cFirstChar != '/' )
394  {
395  strName = x_GetToken( token );
396  // Look for end of tag
397  if ( ! x_FindChar(token.szDoc, token.nNext, '>') )
398  return x_ParseError( "End of tag not found" );
399  }
400  else
401  return x_ReleasePos(); // probably end tag of parent
402  }
403  else
404  return x_ParseError( "Abrupt end within tag" );
405  }
406  m_aPos[iPos].nStartR = token.nNext;
407 
408  // Is ending mark within start tag, i.e. empty element?
409  if ( m_strDoc[m_aPos[iPos].nStartR-1] == '/' )
410  {
411  // Empty element
412  // Close tag left is set to ending mark, and right to open tag right
413  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR-1;
414  m_aPos[iPos].nEndR = m_aPos[iPos].nStartR;
415  }
416  else // look for end tag
417  {
418  // Element probably has contents
419  // Determine where to start looking for left angle bracket of end tag
420  // This is done by recursively parsing the contents of this element
421  int iInner, iInnerPrev = 0;
422  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + 1;
423  while ( (iInner = x_ParseElem( iPos )) > 0 )
424  {
425  // Set links to iInner
426  if ( iInnerPrev )
427  m_aPos[iInnerPrev].iElemNext = iInner;
428  else
429  m_aPos[iPos].iElemChild = iInner;
430  iInnerPrev = iInner;
431 
432  // Set offset to reflect child
433  m_aPos[iPos].nEndL = m_aPos[iInner].nEndR + 1;
434  }
435  if ( iInner == -1 )
436  return -1;
437 
438  // Look for left angle bracket of end tag
439  if ( ! x_FindChar( token.szDoc, m_aPos[iPos].nEndL, '<' ) )
440  return x_ParseError( "End tag of %s element not found", strName.c_str() );
441 
442  // Look through tokens of end tag
443  token.nNext = m_aPos[iPos].nEndL + 1;
444  int nTokenCount = 0;
445  while ( x_FindToken( token ) )
446  {
447  ++nTokenCount;
448  if ( ! token.bIsString )
449  {
450  // Is first token not an end slash mark?
451  if ( nTokenCount == 1 && m_strDoc[token.nL] != '/' )
452  return x_ParseError( "Expecting end tag of element %s", strName.c_str() );
453 
454  else if ( nTokenCount == 2 && ! token.Match(strName.c_str()) )
455  return x_ParseError( "End tag does not correspond to %s", strName.c_str() );
456 
457  // Else is it a right angle bracket?
458  else if ( m_strDoc[token.nL] == '>' )
459  break;
460  }
461  }
462 
463  // Was a right angle bracket not found?
464  if ( ! token.szDoc[token.nL] || nTokenCount < 2 )
465  return x_ParseError( "End tag not completed for element %s", strName.c_str() );
466  m_aPos[iPos].nEndR = token.nL;
467  }
468 
469  // Successfully parsed element (and contained elements)
470  return iPos;
471 }
472 
473 bool CMarkupSTL::x_FindChar( const char* szDoc, int& nChar, char c )
474 {
475  // static function
476  const char* pChar = &szDoc[nChar];
477  while ( *pChar && *pChar != c )
478  pChar += 1;
479  nChar = pChar - szDoc;
480  if ( ! *pChar )
481  return false;
482  /*
483  while ( szDoc[nChar] && szDoc[nChar] != c )
484  nChar += _tclen( &szDoc[nChar] );
485  if ( ! szDoc[nChar] )
486  return false;
487  */
488  return true;
489 }
490 
491 bool CMarkupSTL::x_FindToken( CMarkupSTL::TokenPos& token )
492 {
493  // Starting at token.nNext, bypass whitespace and find the next token
494  // returns true on success, members of token point to token
495  // returns false on end of document, members point to end of document
496  const char* szDoc = token.szDoc;
497  int nChar = token.nNext;
498  token.bIsString = false;
499 
500  // By-pass leading whitespace
501  while ( szDoc[nChar] && strchr(" \t\n\r",szDoc[nChar]) )
502  ++nChar;
503  if ( ! szDoc[nChar] )
504  {
505  // No token was found before end of document
506  token.nL = nChar;
507  token.nR = nChar;
508  token.nNext = nChar;
509  return false;
510  }
511 
512  // Is it an opening quote?
513  char cFirstChar = szDoc[nChar];
514  if ( cFirstChar == '\"' || cFirstChar == '\'' )
515  {
516  token.bIsString = true;
517 
518  // Move past opening quote
519  ++nChar;
520  token.nL = nChar;
521 
522  // Look for closing quote
523  x_FindChar( token.szDoc, nChar, cFirstChar );
524 
525  // Set right to before closing quote
526  token.nR = nChar - 1;
527 
528  // Set nChar past closing quote unless at end of document
529  if ( szDoc[nChar] )
530  ++nChar;
531  }
532  else
533  {
534  // Go until special char or whitespace
535  token.nL = nChar;
536  while ( szDoc[nChar] && ! strchr(" \t\n\r<>=\\/?!",szDoc[nChar]) )
537  nChar += 1;
538 
539  // Adjust end position if it is one special char
540  if ( nChar == token.nL )
541  ++nChar; // it is a special char
542  token.nR = nChar - 1;
543  }
544 
545  // nNext points to one past last char of token
546  token.nNext = nChar;
547  return true;
548 }
549 
550 std::string CMarkupSTL::x_GetToken( const CMarkupSTL::TokenPos& token ) const
551 {
552  // The token contains indexes into the document identifying a small substring
553  // Build the substring from those indexes and return it
554  if ( token.nL > token.nR )
555  return "";
556  return m_strDoc.substr( token.nL,
557  token.nR - token.nL + ((token.nR<m_strDoc.size())? 1:0) );
558 }
559 
560 int CMarkupSTL::x_FindElem( int iPosParent, int iPos, const char* szPath )
561 {
562  // If szPath is NULL or empty, go to next sibling element
563  // Otherwise go to next sibling element with matching path
564  //
565  if ( iPos )
566  iPos = m_aPos[iPos].iElemNext;
567  else
568  iPos = m_aPos[iPosParent].iElemChild;
569 
570  // Finished here if szPath not specified
571  if ( szPath == NULL || !szPath[0] )
572  return iPos;
573 
574  // Search
575  TokenPos token( m_strDoc.c_str() );
576  while ( iPos )
577  {
578  // Compare tag name
579  token.nNext = m_aPos[iPos].nStartL + 1;
580  x_FindToken( token ); // Locate tag name
581  if ( token.Match(szPath) )
582  return iPos;
583  iPos = m_aPos[iPos].iElemNext;
584  }
585  return 0;
586 }
587 
588 int CMarkupSTL::x_ParseNode( CMarkupSTL::TokenPos& token )
589 {
590  // Call this with token.nNext set to the start of the node
591  // This returns the node type and token.nNext set to the char after the node
592  // If the node is not found or an element, token.nR is not determined
593  // White space between elements is a text node
594  int nTypeFound = 0;
595  const char* szDoc = token.szDoc;
596  token.nL = token.nNext;
597  if ( szDoc[token.nL] == '<' )
598  {
599  // Started with <, could be:
600  // <!--...--> comment
601  // <!DOCTYPE ...> dtd
602  // <?target ...?> processing instruction
603  // <![CDATA[...]]> cdata section
604  // <NAME ...> element
605  //
606  if ( ! szDoc[token.nL+1] || ! szDoc[token.nL+2] )
607  return 0;
608  char cFirstChar = szDoc[token.nL+1];
609  const char* szEndOfNode = NULL;
610  if ( cFirstChar == '?' )
611  {
612  nTypeFound = MNT_PROCESSING_INSTRUCTION; // processing instruction
613  szEndOfNode = "?>";
614  }
615  else if ( cFirstChar == '!' )
616  {
617  char cSecondChar = szDoc[token.nL+2];
618  if ( cSecondChar == '[' )
619  {
620  nTypeFound = MNT_CDATA_SECTION;
621  szEndOfNode = "]]>";
622  }
623  else if ( cSecondChar == '-' )
624  {
625  nTypeFound = MNT_COMMENT;
626  szEndOfNode = "-->";
627  }
628  else
629  {
630  // Document type requires tokenizing because of strings and brackets
631  nTypeFound = 0;
632  int nBrackets = 0;
633  while ( x_FindToken(token) )
634  {
635  if ( ! token.bIsString )
636  {
637  char cChar = szDoc[token.nL];
638  if ( cChar == '[' )
639  ++nBrackets;
640  else if ( cChar == ']' )
641  --nBrackets;
642  else if ( nBrackets == 0 && cChar == '>' )
643  {
644  nTypeFound = MNT_DOCUMENT_TYPE;
645  break;
646  }
647  }
648  }
649  if ( ! nTypeFound )
650  return 0;
651  }
652  }
653  else if ( cFirstChar == '/' )
654  {
655  // End tag means no node found within parent element
656  return 0;
657  }
658  else
659  {
660  nTypeFound = MNT_ELEMENT;
661  }
662 
663  // Search for end of node if not found yet
664  if ( szEndOfNode )
665  {
666  const char* pEnd = strstr( &szDoc[token.nNext], szEndOfNode );
667  if ( ! pEnd )
668  return 0; // not well-formed
669  token.nNext = (pEnd - szDoc) + strlen(szEndOfNode);
670  }
671  }
672  else if ( szDoc[token.nL] )
673  {
674  // It is text or whitespace because it did not start with <
675  nTypeFound = MNT_WHITESPACE;
676  if ( x_FindToken(token) )
677  {
678  if ( szDoc[token.nL] == '<' )
679  token.nNext = token.nL;
680  else
681  {
682  nTypeFound = MNT_TEXT;
683  x_FindChar( token.szDoc, token.nNext, '<' );
684  }
685  }
686  }
687  return nTypeFound;
688 }
689 
690 std::string CMarkupSTL::x_GetTagName( int iPos ) const
691 {
692  // Return the tag name at specified element
693  TokenPos token( m_strDoc.c_str() );
694  token.nNext = m_aPos[iPos].nStartL + 1;
695  if ( ! iPos || ! x_FindToken( token ) )
696  return "";
697 
698  // Return substring of document
699  return x_GetToken( token );
700 }
701 
702 bool CMarkupSTL::x_FindAttrib( CMarkupSTL::TokenPos& token, const char* szAttrib ) const
703 {
704  // If szAttrib is NULL find next attrib, otherwise find named attrib
705  // Return true if found
706  int nAttrib = 0;
707  for ( int nCount = 0; x_FindToken(token); ++nCount )
708  {
709  if ( ! token.bIsString )
710  {
711  // Is it the right angle bracket?
712  if ( m_strDoc[token.nL] == '>' || m_strDoc[token.nL] == '/' )
713  break; // attrib not found
714 
715  // Equal sign
716  if ( m_strDoc[token.nL] == '=' )
717  continue;
718 
719  // Potential attribute
720  if ( ! nAttrib && nCount )
721  {
722  // Attribute name search?
723  if ( ! szAttrib || ! szAttrib[0] )
724  return true; // return with token at attrib name
725 
726  // Compare szAttrib
727  if ( token.Match(szAttrib) )
728  nAttrib = nCount;
729  }
730  }
731  else if ( nAttrib && nCount == nAttrib + 2 )
732  {
733  return true;
734  }
735  }
736 
737  // Not found
738  return false;
739 }
740 
741 std::string CMarkupSTL::x_GetAttrib( int iPos, const char* szAttrib ) const
742 {
743  // Return the value of the attrib at specified element
744  if ( ! iPos || m_nNodeType != MNT_ELEMENT )
745  return "";
746 
747  TokenPos token( m_strDoc.c_str() );
748  token.nNext = m_aPos[iPos].nStartL + 1;
749  if ( szAttrib && x_FindAttrib( token, szAttrib ) )
750  return x_TextFromDoc( token.nL, token.nR - ((token.nR<m_strDoc.size())?0:1) );
751  return "";
752 }
753 
754 bool CMarkupSTL::x_SetAttrib( int iPos, const char* szAttrib, int nValue )
755 {
756  // Convert integer to string and call SetChildAttrib
757  char szVal[25];
758  sprintf( szVal, "%d", nValue );
759  return x_SetAttrib( iPos, szAttrib, szVal );
760 }
761 
762 bool CMarkupSTL::x_SetAttrib( int iPos, const char* szAttrib, const char* szValue )
763 {
764  // Set attribute in iPos element
765  if ( ! iPos || m_nNodeType != MNT_ELEMENT )
766  return false;
767 
768  TokenPos token( m_strDoc.c_str() );
769  token.nNext = m_aPos[iPos].nStartL + 1;
770  int nInsertAt, nReplace = 0;
771  std::string strInsert;
772  if ( x_FindAttrib( token, szAttrib ) )
773  {
774  // Decision: for empty value leaving attrib="" instead of removing attrib
775  // Replace value only
776  strInsert = x_TextToDoc( szValue, true );
777  nInsertAt = token.nL;
778  nReplace = token.nR-token.nL+1;
779  }
780  else
781  {
782  // Insert string name value pair
783  std::string strFormat;
784  strFormat = " ";
785  strFormat += szAttrib;
786  strFormat += "=\"";
787  strFormat += x_TextToDoc( szValue, true );
788  strFormat += "\"";
789  strInsert = strFormat;
790 
791  // take into account whether it is an empty element
792  nInsertAt = m_aPos[iPos].nStartR - (m_aPos[iPos].IsEmptyElement()?1:0);
793  }
794 
795  x_DocChange( nInsertAt, nReplace, strInsert );
796  int nAdjust = strInsert.size() - nReplace;
797  m_aPos[iPos].nStartR += nAdjust;
798  m_aPos[iPos].AdjustEnd( nAdjust );
799  x_Adjust( iPos, nAdjust );
800  MARKUP_SETDEBUGSTATE;
801  return true;
802 }
803 
804 bool CMarkupSTL::x_CreateNode( std::string& strNode, int nNodeType, const char* szText )
805 {
806  // Set strNode based on nNodeType and szText
807  // Return false if szText would jeopardize well-formed document
808  //
809  switch ( nNodeType )
810  {
811  case MNT_CDATA_SECTION:
812  if ( strstr(szText,"]]>") != NULL )
813  return false;
814  strNode = "<![CDATA[";
815  strNode += szText;
816  strNode += "]]>";
817  break;
818  }
819  return true;
820 }
821 
822 bool CMarkupSTL::x_SetData( int iPos, const char* szData, int nCDATA )
823 {
824  // Set data at specified position
825  // if nCDATA==1, set content of element to a CDATA Section
826  std::string strInsert;
827 
828  // Set data in iPos element
829  if ( ! iPos || m_aPos[iPos].iElemChild )
830  return false;
831 
832  // Build strInsert from szData based on nCDATA
833  // If CDATA section not valid, use parsed text (PCDATA) instead
834  if ( nCDATA != 0 )
835  if ( ! x_CreateNode(strInsert, MNT_CDATA_SECTION, szData) )
836  nCDATA = 0;
837  if ( nCDATA == 0 )
838  strInsert = x_TextToDoc( szData );
839 
840  // Decide where to insert
841  int nInsertAt, nReplace;
842  if ( m_aPos[iPos].IsEmptyElement() )
843  {
844  nInsertAt = m_aPos[iPos].nEndL;
845  nReplace = 1;
846 
847  // Pre-adjust since <NAME/> becomes <NAME>data</NAME>
848  std::string strTagName = x_GetTagName( iPos );
849  m_aPos[iPos].nStartR -= 1;
850  m_aPos[iPos].nEndL -= (1 + strTagName.size());
851  std::string strFormat;
852  strFormat = ">";
853  strFormat += strInsert;
854  strFormat += "</";
855  strFormat += strTagName;
856  strInsert = strFormat;
857  }
858  else
859  {
860  nInsertAt = m_aPos[iPos].nStartR+1;
861  nReplace = m_aPos[iPos].nEndL - m_aPos[iPos].nStartR - 1;
862  }
863  x_DocChange( nInsertAt, nReplace, strInsert );
864  int nAdjust = strInsert.size() - nReplace;
865  x_Adjust( iPos, nAdjust );
866  m_aPos[iPos].AdjustEnd( nAdjust );
867  MARKUP_SETDEBUGSTATE;
868  return true;
869 }
870 
871 std::string CMarkupSTL::x_GetData( int iPos ) const
872 {
873  // Return a string representing data between start and end tag
874  // Return empty string if there are any children elements
875  if ( ! m_aPos[iPos].iElemChild && ! m_aPos[iPos].IsEmptyElement() )
876  {
877  // See if it is a CDATA section
878  TokenPos token( m_strDoc.c_str() );
879  token.nNext = m_aPos[iPos].nStartR+1;
880  if ( x_FindToken( token ) && m_strDoc[token.nL] == '<'
881  && token.nL + 11 < m_aPos[iPos].nEndL
882  && strncmp( &token.szDoc[token.nL+1], "![CDATA[", 8 ) == 0 )
883  {
884  int nEndCDATA = m_strDoc.find( "]]>", token.nNext );
885  if ( nEndCDATA != std::string::npos && nEndCDATA < m_aPos[iPos].nEndL )
886  {
887  return m_strDoc.substr( token.nL+9, nEndCDATA-token.nL-9 );
888  }
889  }
890  return x_TextFromDoc( m_aPos[iPos].nStartR+1, m_aPos[iPos].nEndL-1 );
891  }
892  return "";
893 }
894 
895 std::string CMarkupSTL::x_TextToDoc( const char* szText, bool bAttrib ) const
896 {
897  //
898  // &lt; less than
899  // &amp; ampersand
900  // &gt; greater than
901  //
902  // and for attributes:
903  //
904  // &apos; apostrophe or single quote
905  // &quot; double quote
906  //
907  static char* szaReplace[] = { "&lt;","&amp;","&gt;","&apos;","&quot;" };
908  const char* pFind = bAttrib?"<&>\'\"":"<&>";
909  const char* pSource = szText;
910  std::string strResult;
911  int nLen = strlen( szText );
912  strResult.reserve( nLen + nLen / 10 );
913  char cSource = *pSource;
914  char* pFound;
915  while ( cSource )
916  {
917  if ( (pFound=strchr((char *) pFind,cSource)) != NULL )
918  {
919  pFound = szaReplace[pFound-pFind];
920  strResult.append(pFound);
921  }
922  else
923  {
924  strResult += cSource;
925  }
926  cSource = *(++pSource);
927  }
928  return strResult;
929 }
930 
931 std::string CMarkupSTL::x_TextFromDoc( int nLeft, int nRight ) const
932 {
933  //
934  // Conveniently the result is always the same or shorter in length
935  //
936  static char* szaCode[] = { "lt;","amp;","gt;","apos;","quot;" };
937  static int anCodeLen[] = { 3,4,3,5,5 };
938  static char* szSymbol = "<&>\'\"";
939  std::string strResult;
940  strResult.reserve( nRight - nLeft + 1 );
941  const char* pSource = m_strDoc.c_str();
942  int nChar = nLeft;
943  char cSource = pSource[nChar];
944  while ( nChar <= nRight )
945  {
946  if ( cSource == '&' )
947  {
948  // If no match is found it means XML doc is invalid
949  // no devastating harm done, ampersand code will just be left in result
950  for ( int nMatch = 0; nMatch < 5; ++nMatch )
951  {
952  if ( nChar <= nRight - anCodeLen[nMatch]
953  && strncmp(szaCode[nMatch],&pSource[nChar+1],anCodeLen[nMatch]) == 0 )
954  {
955  cSource = szSymbol[nMatch];
956  nChar += anCodeLen[nMatch];
957  break;
958  }
959  }
960  }
961  strResult += cSource;
962  nChar++;
963  cSource = pSource[nChar];
964  }
965  return strResult;
966 }
967 
968 void CMarkupSTL::x_DocChange( int nLeft, int nReplace, const std::string& strInsert )
969 {
970  // Insert strInsert int m_strDoc at nLeft replacing nReplace chars
971  //
972  m_strDoc.replace( nLeft, nReplace, strInsert);
973 }
974 
975 void CMarkupSTL::x_Adjust( int iPos, int nShift, bool bAfterPos )
976 {
977  // Loop through affected elements and adjust indexes
978  // Algorithm:
979  // 1. update children unless bAfterPos
980  // (if no children or bAfterPos is true, end tag of iPos not affected)
981  // 2. update next siblings and their children
982  // 3. go up until there is a next sibling of a parent and update end tags
983  // 4. step 2
984  int iPosTop = m_aPos[iPos].iElemParent;
985  bool bPosFirst = bAfterPos; // mark as first to skip its children
986  while ( iPos )
987  {
988  // Were we at containing parent of affected position?
989  bool bPosTop = false;
990  if ( iPos == iPosTop )
991  {
992  // Move iPosTop up one towards root
993  iPosTop = m_aPos[iPos].iElemParent;
994  bPosTop = true;
995  }
996 
997  // Traverse to the next update position
998  if ( ! bPosTop && ! bPosFirst && m_aPos[iPos].iElemChild )
999  {
1000  // Depth first
1001  iPos = m_aPos[iPos].iElemChild;
1002  }
1003  else if ( m_aPos[iPos].iElemNext )
1004  {
1005  iPos = m_aPos[iPos].iElemNext;
1006  }
1007  else
1008  {
1009  // Look for next sibling of a parent of iPos
1010  // When going back up, parents have already been done except iPosTop
1011  while ( (iPos=m_aPos[iPos].iElemParent) != 0 && iPos != iPosTop )
1012  if ( m_aPos[iPos].iElemNext )
1013  {
1014  iPos = m_aPos[iPos].iElemNext;
1015  break;
1016  }
1017  }
1018  bPosFirst = false;
1019 
1020  // Shift indexes at iPos
1021  if ( iPos != iPosTop )
1022  m_aPos[iPos].AdjustStart( nShift );
1023  m_aPos[iPos].AdjustEnd( nShift );
1024  }
1025 }
1026 
1027 void CMarkupSTL::x_LocateNew( int iPosParent, int& iPosRel, int& nOffset, int nLength, int nFlags )
1028 {
1029  // Determine where to insert new element or node
1030  //
1031  bool bInsert = (nFlags&1)?true:false;
1032  bool bHonorWhitespace = (nFlags&2)?true:false;
1033 
1034  int nStartL;
1035  if ( nLength )
1036  {
1037  // Located at a non-element node
1038  if ( bInsert )
1039  nStartL = nOffset;
1040  else
1041  nStartL = nOffset + nLength;
1042  }
1043  else if ( iPosRel )
1044  {
1045  // Located at an element
1046  if ( bInsert ) // precede iPosRel
1047  nStartL = m_aPos[iPosRel].nStartL;
1048  else // follow iPosRel
1049  nStartL = m_aPos[iPosRel].nEndR + 1;
1050  }
1051  else if ( m_aPos[iPosParent].IsEmptyElement() )
1052  {
1053  // Parent has no separate end tag, so split empty element
1054  nStartL = m_aPos[iPosParent].nStartR;
1055  }
1056  else
1057  {
1058  if ( bInsert ) // after start tag
1059  nStartL = m_aPos[iPosParent].nStartR + 1;
1060  else // before end tag
1061  nStartL = m_aPos[iPosParent].nEndL;
1062  }
1063 
1064  // Go up to start of next node, unless its splitting an empty element
1065  if ( ! bHonorWhitespace && ! m_aPos[iPosParent].IsEmptyElement() )
1066  {
1067  TokenPos token( m_strDoc.c_str() );
1068  token.nNext = nStartL;
1069  if ( ! x_FindToken(token) || m_strDoc[token.nL] == '<' )
1070  nStartL = token.nL;
1071  }
1072 
1073  // Determine iPosBefore
1074  int iPosBefore = 0;
1075  if ( iPosRel )
1076  {
1077  if ( bInsert )
1078  {
1079  // Is iPosRel past first sibling?
1080  int iPosPrev = m_aPos[iPosParent].iElemChild;
1081  if ( iPosPrev != iPosRel )
1082  {
1083  // Find previous sibling of iPosRel
1084  while ( m_aPos[iPosPrev].iElemNext != iPosRel )
1085  iPosPrev = m_aPos[iPosPrev].iElemNext;
1086  iPosBefore = iPosPrev;
1087  }
1088  }
1089  else
1090  {
1091  iPosBefore = iPosRel;
1092  }
1093  }
1094  else if ( m_aPos[iPosParent].iElemChild )
1095  {
1096  if ( ! bInsert )
1097  {
1098  // Find last element under iPosParent
1099  int iPosLast = m_aPos[iPosParent].iElemChild;
1100  int iPosNext = iPosLast;
1101  while ( iPosNext )
1102  {
1103  iPosLast = iPosNext;
1104  iPosNext = m_aPos[iPosNext].iElemNext;
1105  }
1106  iPosBefore = iPosLast;
1107  }
1108  }
1109 
1110  nOffset = nStartL;
1111  iPosRel = iPosBefore;
1112 }
1113 
1114 bool CMarkupSTL::x_AddElem( const char* szName, const char* szValue, bool bInsert, bool bAddChild )
1115 {
1116  if ( bAddChild )
1117  {
1118  // Adding a child element under main position
1119  if ( ! m_iPos )
1120  return false;
1121  }
1122  else if ( m_iPosParent == 0 )
1123  {
1124  // Adding root element
1125  if ( IsWellFormed() )
1126  return false;
1127 
1128  // Locate after any version and DTD
1129  m_aPos[0].nEndL = m_strDoc.size();
1130  }
1131 
1132  // Locate where to add element relative to current node
1133  int iPosParent, iPosBefore, nOffset = 0, nLength = 0;
1134  if ( bAddChild )
1135  {
1136  iPosParent = m_iPos;
1137  iPosBefore = m_iPosChild;
1138  }
1139  else
1140  {
1141  iPosParent = m_iPosParent;
1142  iPosBefore = m_iPos;
1143  }
1144  int nFlags = bInsert?1:0;
1145  x_LocateNew( iPosParent, iPosBefore, nOffset, nLength, nFlags );
1146  bool bEmptyParent = m_aPos[iPosParent].IsEmptyElement();
1147  if ( bEmptyParent )
1148  nOffset += 2; // include CRLF
1149 
1150  // Create element and modify positions of affected elements
1151  // If no szValue is specified, an empty element is created
1152  // i.e. either <NAME>value</NAME> or <NAME/>
1153  //
1154  int iPos = x_GetFreePos();
1155  m_aPos[iPos].nStartL = nOffset;
1156 
1157  // Set links
1158  m_aPos[iPos].iElemParent = iPosParent;
1159  m_aPos[iPos].iElemChild = 0;
1160  m_aPos[iPos].iElemNext = 0;
1161  if ( iPosBefore )
1162  {
1163  // Link in after iPosBefore
1164  m_aPos[iPos].iElemNext = m_aPos[iPosBefore].iElemNext;
1165  m_aPos[iPosBefore].iElemNext = iPos;
1166  }
1167  else
1168  {
1169  // First child
1170  m_aPos[iPos].iElemNext = m_aPos[iPosParent].iElemChild;
1171  m_aPos[iPosParent].iElemChild = iPos;
1172  }
1173 
1174  // Create string for insert
1175  std::string strInsert;
1176  int nLenName = strlen(szName);
1177  int nLenValue = szValue? strlen(szValue) : 0;
1178  if ( ! nLenValue )
1179  {
1180  // <NAME/> empty element
1181  strInsert = "<";
1182  strInsert += szName;
1183  strInsert += "/>\r\n";
1184  m_aPos[iPos].nStartR = m_aPos[iPos].nStartL + nLenName + 2;
1185  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR - 1;
1186  m_aPos[iPos].nEndR = m_aPos[iPos].nEndL + 1;
1187  }
1188  else
1189  {
1190  // <NAME>value</NAME>
1191  std::string strValue = x_TextToDoc( szValue );
1192  nLenValue = strValue.size();
1193  strInsert = "<";
1194  strInsert += szName;
1195  strInsert += ">";
1196  strInsert += strValue;
1197  strInsert += "</";
1198  strInsert += szName;
1199  strInsert += ">\r\n";
1200  m_aPos[iPos].nStartR = m_aPos[iPos].nStartL + nLenName + 1;
1201  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + nLenValue + 1;
1202  m_aPos[iPos].nEndR = m_aPos[iPos].nEndL + nLenName + 2;
1203  }
1204 
1205  // Insert
1206  int nReplace = 0, nLeft = m_aPos[iPos].nStartL;
1207  if ( bEmptyParent )
1208  {
1209  std::string strParentTagName = x_GetTagName(iPosParent);
1210  std::string strFormat;
1211  strFormat = ">\r\n";
1212  strFormat += strInsert;
1213  strFormat += "</";
1214  strFormat += strParentTagName;
1215  strInsert = strFormat;
1216  nLeft -= 3;
1217  nReplace = 1;
1218  // x_Adjust is going to update all affected indexes by one amount
1219  // This will satisfy all except the empty parent
1220  // Here we pre-adjust for the empty parent
1221  // The empty tag slash is removed
1222  m_aPos[iPosParent].nStartR -= 1;
1223  // For the newly created end tag, see the following example:
1224  // <A/> (len 4) becomes <A><B/></A> (len 11)
1225  // In x_Adjust everything will be adjusted 11 - 4 = 7
1226  // But the nEndL of element A should only be adjusted 5
1227  m_aPos[iPosParent].nEndL -= (strParentTagName.size() + 1);
1228  }
1229  x_DocChange( nLeft, nReplace, strInsert );
1230  x_Adjust( iPos, strInsert.size() - nReplace );
1231 
1232  if ( bAddChild )
1233  x_SetPos( m_iPosParent, iPosParent, iPos );
1234  else
1235  x_SetPos( iPosParent, iPos, 0 );
1236  return true;
1237 }
1238 
1239 bool CMarkupSTL::x_AddSubDoc( const char* szSubDoc, bool bInsert, bool bAddChild )
1240 {
1241  // Add subdocument, parse, and modify positions of affected elements
1242  //
1243  int nOffset = 0, iPosParent, iPosBefore;
1244  if ( bAddChild )
1245  {
1246  // Add a subdocument under main position, after current child position
1247  if ( ! m_iPos )
1248  return false;
1249  iPosParent = m_iPos;
1250  iPosBefore = m_iPosChild;
1251  }
1252  else
1253  {
1254  iPosParent = m_iPosParent;
1255  iPosBefore = m_iPos;
1256  }
1257  int nFlags = bInsert?1:0;
1258  x_LocateNew( iPosParent, iPosBefore, nOffset, 0, nFlags );
1259  bool bEmptyParent = m_aPos[iPosParent].IsEmptyElement();
1260  if ( bEmptyParent )
1261  nOffset += 2; // include CRLF
1262 
1263  // if iPosBefore is NULL, insert as first element under parent
1264  int nParentEndLBeforeAdd = m_aPos[iPosParent].nEndL;
1265  int iPosFreeBeforeAdd = m_iPosFree;
1266 
1267  // Skip version tag or DTD at start of subdocument
1268  TokenPos token( szSubDoc );
1269  int nNodeType = x_ParseNode( token );
1270  while ( nNodeType && nNodeType != MNT_ELEMENT )
1271  {
1272  token.szDoc = &szSubDoc[token.nNext];
1273  token.nNext = 0;
1274  nNodeType = x_ParseNode( token );
1275  }
1276  std::string strInsert = token.szDoc;
1277 
1278  // Insert subdocument
1279  m_aPos[iPosParent].nEndL = nOffset;
1280  int nReplace = 0, nLeft = nOffset;
1281  std::string strParentTagName;
1282  if ( bEmptyParent )
1283  {
1284  strParentTagName = x_GetTagName(iPosParent);
1285  std::string strFormat;
1286  strFormat = ">\r\n";
1287  strFormat += strInsert;
1288  strFormat += "</";
1289  strFormat += strParentTagName;
1290  strInsert = strFormat;
1291  m_aPos[iPosParent].nEndL = m_aPos[iPosParent].nStartR + 2;
1292  nLeft = m_aPos[iPosParent].nStartR - 1;
1293  nReplace = 1;
1294  }
1295  x_DocChange( nLeft, nReplace, strInsert );
1296 
1297  // Parse subdocument
1298  int iPos = x_ParseElem(iPosParent);
1299  m_aPos[iPosParent].nEndL = nParentEndLBeforeAdd;
1300  if ( iPos <= 0 )
1301  {
1302  // Abort because not well-formed
1303  std::string strRevert = bEmptyParent?"/":"";
1304  x_DocChange( nLeft, strInsert.size(), strRevert );
1305  m_iPosFree = iPosFreeBeforeAdd;
1306  return false;
1307  }
1308  else
1309  {
1310  // Link in parent and siblings
1311  m_aPos[iPos].iElemParent = iPosParent;
1312  if ( iPosBefore )
1313  {
1314  m_aPos[iPos].iElemNext = m_aPos[iPosBefore].iElemNext;
1315  m_aPos[iPosBefore].iElemNext = iPos;
1316  }
1317  else
1318  {
1319  m_aPos[iPos].iElemNext = m_aPos[iPosParent].iElemChild;
1320  m_aPos[iPosParent].iElemChild = iPos;
1321  }
1322 
1323  // Make empty parent pre-adjustment
1324  if ( bEmptyParent )
1325  {
1326  m_aPos[iPosParent].nStartR -= 1;
1327  m_aPos[iPosParent].nEndL -= (strParentTagName.size() + 1);
1328  }
1329 
1330  // Adjust, but don't adjust children of iPos (bAfterPos=true)
1331  x_Adjust( iPos, strInsert.size() - nReplace, true );
1332  }
1333 
1334  // Set position to top element of subdocument
1335  if ( bAddChild )
1336  x_SetPos( m_iPosParent, iPosParent, iPos );
1337  else // Main
1338  x_SetPos( m_iPosParent, iPos, 0 );
1339  return true;
1340 }
1341 
1342 int CMarkupSTL::x_RemoveElem( int iPos )
1343 {
1344  // Remove element and all contained elements
1345  // Return new position
1346  //
1347  int iPosParent = m_aPos[iPos].iElemParent;
1348 
1349  // Find previous sibling and bypass removed element
1350  // This leaves orphan positions in m_aPos array
1351  int iPosLook = m_aPos[iPosParent].iElemChild;
1352  int iPosPrev = 0;
1353  while ( iPosLook != iPos )
1354  {
1355  iPosPrev = iPosLook;
1356  iPosLook = m_aPos[iPosLook].iElemNext;
1357  }
1358  if ( iPosPrev )
1359  m_aPos[iPosPrev].iElemNext = m_aPos[iPos].iElemNext;
1360  else
1361  m_aPos[iPosParent].iElemChild = m_aPos[iPos].iElemNext;
1362 
1363  // Remove from document
1364  // Links have been changed to go around removed element
1365  // But element position and links are still valid
1366  int nAfterEnd = m_aPos[iPos].nEndR + 1;
1367  TokenPos token( m_strDoc.c_str() );
1368  token.nNext = nAfterEnd;
1369  if ( ! x_FindToken(token) || token.szDoc[token.nL] == '<' )
1370  nAfterEnd = token.nL;
1371  int nLen = nAfterEnd - m_aPos[iPos].nStartL;
1372  x_DocChange( m_aPos[iPos].nStartL, nLen, std::string() );
1373  x_Adjust( iPos, - nLen, true );
1374  return iPosPrev;
1375 }
1376 
1377 
1378 } //StdUtils
Namespace for the standard utility objects.
Definition: MarkupSTL.cpp:19