Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/mono.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'mcs/class/System.Web/Test/mainsoft/MainsoftWebTest/HtmlAgilityPack/HtmlDocument.cs')
-rw-r--r--mcs/class/System.Web/Test/mainsoft/MainsoftWebTest/HtmlAgilityPack/HtmlDocument.cs2079
1 files changed, 0 insertions, 2079 deletions
diff --git a/mcs/class/System.Web/Test/mainsoft/MainsoftWebTest/HtmlAgilityPack/HtmlDocument.cs b/mcs/class/System.Web/Test/mainsoft/MainsoftWebTest/HtmlAgilityPack/HtmlDocument.cs
deleted file mode 100644
index f79fab47996..00000000000
--- a/mcs/class/System.Web/Test/mainsoft/MainsoftWebTest/HtmlAgilityPack/HtmlDocument.cs
+++ /dev/null
@@ -1,2079 +0,0 @@
-// HtmlAgilityPack V1.0 - Simon Mourier <simonm@microsoft.com>
-using System;
-using System.IO;
-using System.Text;
-using System.Diagnostics;
-using System.Collections;
-using System.Text.RegularExpressions;
-using System.Xml;
-using System.Xml.XPath;
-
-namespace HtmlAgilityPack
-{
- /// <summary>
- /// Represents the type of parsing error.
- /// </summary>
- public enum HtmlParseErrorCode
- {
- /// <summary>
- /// A tag was not closed.
- /// </summary>
- TagNotClosed,
-
- /// <summary>
- /// A tag was not opened.
- /// </summary>
- TagNotOpened,
-
- /// <summary>
- /// There is a charset mismatch between stream and declared (META) encoding.
- /// </summary>
- CharsetMismatch,
-
- /// <summary>
- /// An end tag was not required.
- /// </summary>
- EndTagNotRequired,
-
- /// <summary>
- /// An end tag is invalid at this position.
- /// </summary>
- EndTagInvalidHere
- }
-
- /// <summary>
- /// Represents a parsing error found during document parsing.
- /// </summary>
- public class HtmlParseError
- {
- private HtmlParseErrorCode _code;
- private int _line;
- private int _linePosition;
- private int _streamPosition;
- private string _sourceText;
- private string _reason;
-
- internal HtmlParseError(
- HtmlParseErrorCode code,
- int line,
- int linePosition,
- int streamPosition,
- string sourceText,
- string reason)
- {
- _code = code;
- _line = line;
- _linePosition = linePosition;
- _streamPosition = streamPosition;
- _sourceText = sourceText;
- _reason = reason;
- }
-
- /// <summary>
- /// Gets the type of error.
- /// </summary>
- public HtmlParseErrorCode Code
- {
- get
- {
- return _code;
- }
- }
-
- /// <summary>
- /// Gets the line number of this error in the document.
- /// </summary>
- public int Line
- {
- get
- {
- return _line;
- }
- }
-
- /// <summary>
- /// Gets the column number of this error in the document.
- /// </summary>
- public int LinePosition
- {
- get
- {
- return _linePosition;
- }
- }
-
- /// <summary>
- /// Gets the absolute stream position of this error in the document, relative to the start of the document.
- /// </summary>
- public int StreamPosition
- {
- get
- {
- return _streamPosition;
- }
- }
-
- /// <summary>
- /// Gets the the full text of the line containing the error.
- /// </summary>
- public string SourceText
- {
- get
- {
- return _sourceText;
- }
- }
-
- /// <summary>
- /// Gets a description for the error.
- /// </summary>
- public string Reason
- {
- get
- {
- return _reason;
- }
- }
- }
-
- /// <summary>
- /// Represents a complete HTML document.
- /// </summary>
- public class HtmlDocument: IXPathNavigable
- {
- internal static readonly string HtmlExceptionRefNotChild = "Reference node must be a child of this node";
- internal static readonly string HtmlExceptionUseIdAttributeFalse = "You need to set UseIdAttribute property to true to enable this feature";
-
- internal Hashtable _openednodes;
- internal Hashtable _lastnodes = new Hashtable();
- internal Hashtable _nodesid;
- private HtmlNode _documentnode;
- internal string _text;
- private string _remainder;
- private int _remainderOffset;
- private HtmlNode _currentnode;
- private HtmlNode _lastparentnode;
- private HtmlAttribute _currentattribute;
- private int _index;
- private int _line;
- private int _lineposition, _maxlineposition;
- private int _c;
- private bool _fullcomment;
- private System.Text.Encoding _streamencoding;
- private System.Text.Encoding _declaredencoding;
- private ArrayList _parseerrors = new ArrayList();
- private ParseState _state, _oldstate;
- private Crc32 _crc32 = null;
- private bool _onlyDetectEncoding = false;
-
- // public props
-
- /// <summary>
- /// Defines if a checksum must be computed for the document while parsing. Default is false.
- /// </summary>
- public bool OptionComputeChecksum = false;
-
- /// <summary>
- /// Defines if declared encoding must be read from the document.
- /// Declared encoding is determined using the meta http-equiv="content-type" content="text/html;charset=XXXXX" html node.
- /// Default is true.
- /// </summary>
- public bool OptionReadEncoding = true;
-
-
- /// <summary>
- /// Defines if non closed nodes will be checked at the end of parsing. Default is true.
- /// </summary>
- public bool OptionCheckSyntax = true;
-
- /// <summary>
- /// Defines if the 'id' attribute must be specifically used. Default is true.
- /// </summary>
- public bool OptionUseIdAttribute = true;
-
- /// <summary>
- /// Defines if empty nodes must be written as closed during output. Default is false.
- /// </summary>
- public bool OptionWriteEmptyNodes = false;
-
- /// <summary>
- /// Defines if output must conform to XML, instead of HTML.
- /// </summary>
- public bool OptionOutputAsXml = false;
-
- /// <summary>
- /// Defines if name must be output in uppercase. Default is false.
- /// </summary>
- public bool OptionOutputUpperCase = false;
-
- /// <summary>
- /// Defines if attribute value output must be optimized (not bound with double quotes if it is possible). Default is false.
- /// </summary>
- public bool OptionOutputOptimizeAttributeValues = false;
-
- /// <summary>
- /// Adds Debugging attributes to node. Default is false.
- /// </summary>
- public bool OptionAddDebuggingAttributes = false;
-
- /// <summary>
- /// Defines if source text must be extracted while parsing errors.
- /// If the document has a lot of errors, or cascading errors, parsing performance can be dramatically affected if set to true.
- /// Default is false.
- /// </summary>
- public bool OptionExtractErrorSourceText = false; // turning this on can dramatically slow performance if a lot of errors are detected
-
- /// <summary>
- /// Defines if closing for non closed nodes must be done at the end or directly in the document.
- /// Setting this to true can actually change how browsers render the page. Default is false.
- /// </summary>
- public bool OptionAutoCloseOnEnd = false; // close errors at the end
-
- /// <summary>
- /// Defines if LI, TR, TH, TD tags must be partially fixed when nesting errors are detected. Default is false.
- /// </summary>
- public bool OptionFixNestedTags = false; // fix li, tr, th, td tags
-
- /// <summary>
- /// Defines the maximum length of source text or parse errors. Default is 100.
- /// </summary>
- public int OptionExtractErrorSourceTextMaxLength = 100;
-
- /// <summary>
- /// Defines the default stream encoding to use. Default is System.Text.Encoding.Default.
- /// </summary>
- public System.Text.Encoding OptionDefaultStreamEncoding = System.Text.Encoding.Default;
-
- /// <summary>
- /// Defines the name of a node that will throw the StopperNodeException when found as an end node. Default is null.
- /// </summary>
- public string OptionStopperNodeName = null;
-
- /// <summary>
- /// Gets the remaining text.
- /// Will always be null if OptionStopperNodeName is null.
- /// </summary>
- public string Remainder
- {
- get
- {
- return _remainder;
- }
- }
-
- /// <summary>
- /// Gets the offset of Remainder in the original Html text.
- /// If OptionStopperNodeName is null, this will return the length of the original Html text.
- /// </summary>
- public int RemainderOffset
- {
- get
- {
- return _remainderOffset;
- }
- }
-
- /// <summary>
- /// Gets a list of parse errors found in the document.
- /// </summary>
- public ArrayList ParseErrors
- {
- get
- {
- return _parseerrors;
- }
- }
-
- /// <summary>
- /// Gets the document's stream encoding.
- /// </summary>
- public System.Text.Encoding StreamEncoding
- {
- get
- {
- return _streamencoding;
- }
- }
-
- /// <summary>
- /// Gets the document's declared encoding.
- /// Declared encoding is determined using the meta http-equiv="content-type" content="text/html;charset=XXXXX" html node.
- /// </summary>
- public System.Text.Encoding DeclaredEncoding
- {
- get
- {
- return _declaredencoding;
- }
- }
-
- /// <summary>
- /// Creates an instance of an HTML document.
- /// </summary>
- public HtmlDocument()
- {
- _documentnode = CreateNode(HtmlNodeType.Document, 0);
- }
-
- internal HtmlNode GetXmlDeclaration()
- {
- if (!_documentnode.HasChildNodes)
- {
- return null;
- }
-
- foreach(HtmlNode node in _documentnode._childnodes)
- {
- if (node.Name == "?xml") // it's ok, names are case sensitive
- {
- return node;
- }
- }
- return null;
- }
-
- /// <summary>
- /// Applies HTML encoding to a specified string.
- /// </summary>
- /// <param name="html">The input string to encode. May not be null.</param>
- /// <returns>The encoded string.</returns>
- public static string HtmlEncode(string html)
- {
- if (html == null)
- {
- throw new ArgumentNullException("html");
- }
- // replace & by &amp; but only once!
- Regex rx = new Regex("&(?!(amp;)|(lt;)|(gt;)|(quot;))", RegexOptions.IgnoreCase);
- return rx.Replace(html, "&amp;").Replace("<", "&lt;").Replace(">", "&gt;").Replace("\"", "&quot;");
- }
-
- /// <summary>
- /// Detects the encoding of an HTML stream.
- /// </summary>
- /// <param name="stream">The input stream. May not be null.</param>
- /// <returns>The detected encoding.</returns>
- public Encoding DetectEncoding(Stream stream)
- {
- if (stream == null)
- {
- throw new ArgumentNullException("stream");
- }
- return DetectEncoding(new StreamReader(stream));
- }
-
- /// <summary>
- /// Detects the encoding of an HTML file.
- /// </summary>
- /// <param name="path">Path for the file containing the HTML document to detect. May not be null.</param>
- /// <returns>The detected encoding.</returns>
- public Encoding DetectEncoding(string path)
- {
- if (path == null)
- {
- throw new ArgumentNullException("path");
- }
- StreamReader sr = new StreamReader(path, OptionDefaultStreamEncoding);
- Encoding encoding = DetectEncoding(sr);
- sr.Close();
- return encoding;
- }
-
- /// <summary>
- /// Detects the encoding of an HTML text.
- /// </summary>
- /// <param name="html">The input html text. May not be null.</param>
- /// <returns>The detected encoding.</returns>
- public Encoding DetectEncodingHtml(string html)
- {
- if (html == null)
- {
- throw new ArgumentNullException("html");
- }
- StringReader sr = new StringReader(html);
- Encoding encoding = DetectEncoding(sr);
- sr.Close();
- return encoding;
- }
-
- /// <summary>
- /// Detects the encoding of an HTML text provided on a TextReader.
- /// </summary>
- /// <param name="reader">The TextReader used to feed the HTML. May not be null.</param>
- /// <returns>The detected encoding.</returns>
- public Encoding DetectEncoding(TextReader reader)
- {
- if (reader == null)
- {
- throw new ArgumentNullException("reader");
- }
- _onlyDetectEncoding = true;
- if (OptionCheckSyntax)
- {
- _openednodes = new Hashtable();
- }
- else
- {
- _openednodes = null;
- }
-
- if (OptionUseIdAttribute)
- {
- _nodesid = new Hashtable();
- }
- else
- {
- _nodesid = null;
- }
-
- StreamReader sr = reader as StreamReader;
- if (sr != null)
- {
- _streamencoding = sr.CurrentEncoding;
- }
- else
- {
- _streamencoding = null;
- }
- _declaredencoding = null;
-
- _text = reader.ReadToEnd();
- _documentnode = CreateNode(HtmlNodeType.Document, 0);
-
- // this is almost a hack, but it allows us not to muck with the original parsing code
- try
- {
- Parse();
- }
- catch(EncodingFoundException ex)
- {
- return ex.Encoding;
- }
- return null;
- }
-
- /// <summary>
- /// Loads an HTML document from a stream.
- /// </summary>
- /// <param name="stream">The input stream.</param>
- public void Load(Stream stream)
- {
- Load(new StreamReader(stream, OptionDefaultStreamEncoding));
- }
-
- /// <summary>
- /// Loads an HTML document from a stream.
- /// </summary>
- /// <param name="stream">The input stream.</param>
- /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
- public void Load(Stream stream, bool detectEncodingFromByteOrderMarks)
- {
- Load(new StreamReader(stream, detectEncodingFromByteOrderMarks));
- }
-
- /// <summary>
- /// Loads an HTML document from a stream.
- /// </summary>
- /// <param name="stream">The input stream.</param>
- /// <param name="encoding">The character encoding to use.</param>
- public void Load(Stream stream, Encoding encoding)
- {
- Load(new StreamReader(stream, encoding));
- }
-
- /// <summary>
- /// Loads an HTML document from a stream.
- /// </summary>
- /// <param name="stream">The input stream.</param>
- /// <param name="encoding">The character encoding to use.</param>
- /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
- public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
- {
- Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks));
- }
-
- /// <summary>
- /// Loads an HTML document from a stream.
- /// </summary>
- /// <param name="stream">The input stream.</param>
- /// <param name="encoding">The character encoding to use.</param>
- /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the stream.</param>
- /// <param name="buffersize">The minimum buffer size.</param>
- public void Load(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
- {
- Load(new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks, buffersize));
- }
-
- /// <summary>
- /// Loads an HTML document from a file.
- /// </summary>
- /// <param name="path">The complete file path to be read. May not be null.</param>
- public void Load(string path)
- {
- if (path == null)
- {
- throw new ArgumentNullException("path");
- }
- StreamReader sr = new StreamReader(path, OptionDefaultStreamEncoding);
- Load(sr);
- sr.Close();
- }
-
- /// <summary>
- /// Loads an HTML document from a file.
- /// </summary>
- /// <param name="path">The complete file path to be read. May not be null.</param>
- /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
- public void Load(string path, bool detectEncodingFromByteOrderMarks)
- {
- if (path == null)
- {
- throw new ArgumentNullException("path");
- }
- StreamReader sr = new StreamReader(path, detectEncodingFromByteOrderMarks);
- Load(sr);
- sr.Close();
- }
-
- /// <summary>
- /// Loads an HTML document from a file.
- /// </summary>
- /// <param name="path">The complete file path to be read. May not be null.</param>
- /// <param name="encoding">The character encoding to use. May not be null.</param>
- public void Load(string path, Encoding encoding)
- {
- if (path == null)
- {
- throw new ArgumentNullException("path");
- }
- if (encoding == null)
- {
- throw new ArgumentNullException("encoding");
- }
- StreamReader sr = new StreamReader(path, encoding);
- Load(sr);
- sr.Close();
- }
-
- /// <summary>
- /// Loads an HTML document from a file.
- /// </summary>
- /// <param name="path">The complete file path to be read. May not be null.</param>
- /// <param name="encoding">The character encoding to use. May not be null.</param>
- /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
- public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
- {
- if (path == null)
- {
- throw new ArgumentNullException("path");
- }
- if (encoding == null)
- {
- throw new ArgumentNullException("encoding");
- }
- StreamReader sr = new StreamReader(path, encoding, detectEncodingFromByteOrderMarks);
- Load(sr);
- sr.Close();
- }
-
- /// <summary>
- /// Loads an HTML document from a file.
- /// </summary>
- /// <param name="path">The complete file path to be read. May not be null.</param>
- /// <param name="encoding">The character encoding to use. May not be null.</param>
- /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look for byte order marks at the beginning of the file.</param>
- /// <param name="buffersize">The minimum buffer size.</param>
- public void Load(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
- {
- if (path == null)
- {
- throw new ArgumentNullException("path");
- }
- if (encoding == null)
- {
- throw new ArgumentNullException("encoding");
- }
- StreamReader sr = new StreamReader(path, encoding, detectEncodingFromByteOrderMarks, buffersize);
- Load(sr);
- sr.Close();
- }
-
- /// <summary>
- /// Loads the HTML document from the specified string.
- /// </summary>
- /// <param name="html">String containing the HTML document to load. May not be null.</param>
- public void LoadHtml(string html)
- {
- if (html == null)
- {
- throw new ArgumentNullException("html");
- }
- StringReader sr = new StringReader(html);
- Load(sr);
- sr.Close();
- }
-
- /// <summary>
- /// Detects the encoding of an HTML document from a file first, and then loads the file.
- /// </summary>
- /// <param name="path">The complete file path to be read.</param>
- public void DetectEncodingAndLoad(string path)
- {
- DetectEncodingAndLoad(path, true);
- }
-
- /// <summary>
- /// Detects the encoding of an HTML document from a file first, and then loads the file.
- /// </summary>
- /// <param name="path">The complete file path to be read. May not be null.</param>
- /// <param name="detectEncoding">true to detect encoding, false otherwise.</param>
- public void DetectEncodingAndLoad(string path, bool detectEncoding)
- {
- if (path == null)
- {
- throw new ArgumentNullException("path");
- }
- System.Text.Encoding enc;
- if (detectEncoding)
- {
- enc = DetectEncoding(path);
- }
- else
- {
- enc = null;
- }
-
- if (enc == null)
- {
- Load(path);
- }
- else
- {
- Load(path, enc);
- }
- }
-
- /// <summary>
- /// Loads the HTML document from the specified TextReader.
- /// </summary>
- /// <param name="reader">The TextReader used to feed the HTML data into the document. May not be null.</param>
- public void Load(TextReader reader)
- {
- // all Load methods pass down to this one
- if (reader == null)
- {
- throw new ArgumentNullException("reader");
- }
-
- _onlyDetectEncoding = false;
-
- if (OptionCheckSyntax)
- {
- _openednodes = new Hashtable();
- }
- else
- {
- _openednodes = null;
- }
-
- if (OptionUseIdAttribute)
- {
- _nodesid = new Hashtable();
- }
- else
- {
- _nodesid = null;
- }
-
- StreamReader sr = reader as StreamReader;
- if (sr != null)
- {
- try
- {
- // trigger bom read if needed
- sr.Peek();
- }
- catch
- {
- // void on purpose
- }
- _streamencoding = sr.CurrentEncoding;
- }
- else
- {
- _streamencoding = null;
- }
- _declaredencoding = null;
-
- _text = reader.ReadToEnd();
- _documentnode = CreateNode(HtmlNodeType.Document, 0);
- Parse();
-
- if (OptionCheckSyntax)
- {
- foreach(HtmlNode node in _openednodes.Values)
- {
- if (!node._starttag) // already reported
- {
- continue;
- }
-
- string html;
- if (OptionExtractErrorSourceText)
- {
- html = node.OuterHtml;
- if (html.Length > OptionExtractErrorSourceTextMaxLength)
- {
- html = html.Substring(0, OptionExtractErrorSourceTextMaxLength);
- }
- }
- else
- {
- html = string.Empty;
- }
- AddError(
- HtmlParseErrorCode.TagNotClosed,
- node._line, node._lineposition,
- node._streamposition, html,
- "End tag </" + node.Name + "> was not found");
- }
-
- // we don't need this anymore
- _openednodes.Clear();
- }
- }
-
- internal System.Text.Encoding GetOutEncoding()
- {
- // when unspecified, use the stream encoding first
- if (_declaredencoding != null)
- {
- return _declaredencoding;
- }
- else
- {
- if (_streamencoding != null)
- {
- return _streamencoding;
- }
- }
- return OptionDefaultStreamEncoding;
- }
-
-
- /// <summary>
- /// Gets the document's output encoding.
- /// </summary>
- public System.Text.Encoding Encoding
- {
- get
- {
- return GetOutEncoding();
- }
- }
-
- /// <summary>
- /// Saves the HTML document to the specified stream.
- /// </summary>
- /// <param name="outStream">The stream to which you want to save.</param>
- public void Save(Stream outStream)
- {
- StreamWriter sw = new StreamWriter(outStream, GetOutEncoding());
- Save(sw);
- }
-
- /// <summary>
- /// Saves the HTML document to the specified stream.
- /// </summary>
- /// <param name="outStream">The stream to which you want to save. May not be null.</param>
- /// <param name="encoding">The character encoding to use. May not be null.</param>
- public void Save(Stream outStream, System.Text.Encoding encoding)
- {
- if (outStream == null)
- {
- throw new ArgumentNullException("outStream");
- }
- if (encoding == null)
- {
- throw new ArgumentNullException("encoding");
- }
- StreamWriter sw = new StreamWriter(outStream, encoding);
- Save(sw);
- }
-
- /// <summary>
- /// Saves the mixed document to the specified file.
- /// </summary>
- /// <param name="filename">The location of the file where you want to save the document.</param>
- public void Save(string filename)
- {
- StreamWriter sw = new StreamWriter(filename, false, GetOutEncoding());
- Save(sw);
- sw.Close();
- }
-
- /// <summary>
- /// Saves the mixed document to the specified file.
- /// </summary>
- /// <param name="filename">The location of the file where you want to save the document. May not be null.</param>
- /// <param name="encoding">The character encoding to use. May not be null.</param>
- public void Save(string filename, System.Text.Encoding encoding)
- {
- if (filename == null)
- {
- throw new ArgumentNullException("filename");
- }
- if (encoding == null)
- {
- throw new ArgumentNullException("encoding");
- }
- StreamWriter sw = new StreamWriter(filename, false, encoding);
- Save(sw);
- sw.Close();
- }
-
- /// <summary>
- /// Saves the HTML document to the specified StreamWriter.
- /// </summary>
- /// <param name="writer">The StreamWriter to which you want to save.</param>
- public void Save(StreamWriter writer)
- {
- Save((TextWriter)writer);
- }
-
- /// <summary>
- /// Saves the HTML document to the specified TextWriter.
- /// </summary>
- /// <param name="writer">The TextWriter to which you want to save. May not be null.</param>
- public void Save(TextWriter writer)
- {
- if (writer == null)
- {
- throw new ArgumentNullException("writer");
- }
- DocumentNode.WriteTo(writer);
- }
-
- /// <summary>
- /// Saves the HTML document to the specified XmlWriter.
- /// </summary>
- /// <param name="writer">The XmlWriter to which you want to save.</param>
- public void Save(XmlWriter writer)
- {
- DocumentNode.WriteTo(writer);
- writer.Flush();
- }
-
- /// <summary>
- /// Creates a new XPathNavigator object for navigating this HTML document.
- /// </summary>
- /// <returns>An XPathNavigator object. The XPathNavigator is positioned on the root of the document.</returns>
- public XPathNavigator CreateNavigator()
- {
- return new HtmlNodeNavigator(this, _documentnode);
- }
-
- /// <summary>
- /// Gets a valid XML name.
- /// </summary>
- /// <param name="name">Any text.</param>
- /// <returns>A string that is a valid XML name.</returns>
- public static string GetXmlName(string name)
- {
- string xmlname = string.Empty;
- bool nameisok = true;
- for(int i=0;i<name.Length;i++)
- {
- // names are lcase
- // note: we are very limited here, too much?
- if (((name[i]>='a') && (name[i]<='z')) ||
- ((name[i]>='0') && (name[i]<='9')) ||
- // (name[i]==':') || (name[i]=='_') || (name[i]=='-') || (name[i]=='.')) // these are bads in fact
- (name[i]=='_') || (name[i]=='-') || (name[i]=='.'))
- {
- xmlname += name[i];
- }
- else
- {
- nameisok = false;
- byte[] bytes = System.Text.Encoding.UTF8.GetBytes(new char[]{name[i]});
- for(int j=0;j<bytes.Length;j++)
- {
- xmlname += bytes[j].ToString("x2");
- }
- xmlname += "_";
- }
-
- }
- if (nameisok)
- {
- return xmlname;
- }
- return "_" + xmlname;
- }
-
- internal void SetIdForNode(HtmlNode node, string id)
- {
- if (!OptionUseIdAttribute)
- {
- return;
- }
-
- if ((_nodesid == null) || (id == null))
- {
- return;
- }
-
- if (node == null)
- {
- _nodesid.Remove(id.ToLower());
- }
- else
- {
- _nodesid[id.ToLower()] = node;
- }
- }
-
- /// <summary>
- /// Gets the HTML node with the specified 'id' attribute value.
- /// </summary>
- /// <param name="id">The attribute id to match. May not be null.</param>
- /// <returns>The HTML node with the matching id or null if not found.</returns>
- public HtmlNode GetElementbyId(string id)
- {
- if (id == null)
- {
- throw new ArgumentNullException("id");
- }
- if (_nodesid == null)
- {
- throw new Exception(HtmlExceptionUseIdAttributeFalse);
- }
-
- return _nodesid[id.ToLower()] as HtmlNode;
- }
-
- /// <summary>
- /// Creates an HTML element node with the specified name.
- /// </summary>
- /// <param name="name">The qualified name of the element. May not be null.</param>
- /// <returns>The new HTML node.</returns>
- public HtmlNode CreateElement(string name)
- {
- if (name == null)
- {
- throw new ArgumentNullException("name");
- }
- HtmlNode node = CreateNode(HtmlNodeType.Element);
- node._name = name;
- return node;
- }
-
- /// <summary>
- /// Creates an HTML comment node.
- /// </summary>
- /// <returns>The new HTML comment node.</returns>
- public HtmlCommentNode CreateComment()
- {
- return (HtmlCommentNode)CreateNode(HtmlNodeType.Comment);
- }
-
- /// <summary>
- /// Creates an HTML comment node with the specified comment text.
- /// </summary>
- /// <param name="comment">The comment text. May not be null.</param>
- /// <returns>The new HTML comment node.</returns>
- public HtmlCommentNode CreateComment(string comment)
- {
- if (comment == null)
- {
- throw new ArgumentNullException("comment");
- }
- HtmlCommentNode c = CreateComment();
- c.Comment = comment;
- return c;
- }
-
- /// <summary>
- /// Creates an HTML text node.
- /// </summary>
- /// <returns>The new HTML text node.</returns>
- public HtmlTextNode CreateTextNode()
- {
- return (HtmlTextNode)CreateNode(HtmlNodeType.Text);
- }
-
- /// <summary>
- /// Creates an HTML text node with the specified text.
- /// </summary>
- /// <param name="text">The text of the node. May not be null.</param>
- /// <returns>The new HTML text node.</returns>
- public HtmlTextNode CreateTextNode(string text)
- {
- if (text == null)
- {
- throw new ArgumentNullException("text");
- }
- HtmlTextNode t = CreateTextNode();
- t.Text = text;
- return t;
- }
-
- internal HtmlNode CreateNode(HtmlNodeType type)
- {
- return CreateNode(type, -1);
- }
-
- internal HtmlNode CreateNode(HtmlNodeType type, int index)
- {
- switch (type)
- {
- case HtmlNodeType.Comment:
- return new HtmlCommentNode(this, index);
-
- case HtmlNodeType.Text:
- return new HtmlTextNode(this, index);
-
- default:
- return new HtmlNode(type, this, index);
- }
- }
-
- internal HtmlAttribute CreateAttribute()
- {
- return new HtmlAttribute(this);
- }
-
- /// <summary>
- /// Creates an HTML attribute with the specified name.
- /// </summary>
- /// <param name="name">The name of the attribute. May not be null.</param>
- /// <returns>The new HTML attribute.</returns>
- public HtmlAttribute CreateAttribute(string name)
- {
- if (name == null)
- {
- throw new ArgumentNullException("name");
- }
- HtmlAttribute att = CreateAttribute();
- att.Name = name;
- return att;
- }
-
- /// <summary>
- /// Creates an HTML attribute with the specified name.
- /// </summary>
- /// <param name="name">The name of the attribute. May not be null.</param>
- /// <param name="value">The value of the attribute.</param>
- /// <returns>The new HTML attribute.</returns>
- public HtmlAttribute CreateAttribute(string name, string value)
- {
- if (name == null)
- {
- throw new ArgumentNullException("name");
- }
- HtmlAttribute att = CreateAttribute(name);
- att.Value = value;
- return att;
- }
-
- /// <summary>
- /// Gets the root node of the document.
- /// </summary>
- public HtmlNode DocumentNode
- {
- get
- {
- return _documentnode;
- }
- }
-
- /// <summary>
- /// Gets the document CRC32 checksum if OptionComputeChecksum was set to true before parsing, 0 otherwise.
- /// </summary>
- public int CheckSum
- {
- get
- {
- if (_crc32 == null)
- {
- return 0;
- }
- else
- {
- return (int)_crc32.CheckSum;
- }
- }
- }
-
- private HtmlParseError AddError(
- HtmlParseErrorCode code,
- int line,
- int linePosition,
- int streamPosition,
- string sourceText,
- string reason)
- {
- HtmlParseError err = new HtmlParseError(code, line, linePosition, streamPosition, sourceText, reason);
- _parseerrors.Add(err);
- return err;
- }
-
-
- private enum ParseState
- {
- Text,
- WhichTag,
- Tag,
- BetweenAttributes,
- EmptyTag,
- AttributeName,
- AttributeBeforeEquals,
- AttributeAfterEquals,
- AttributeValue,
- Comment,
- QuotedAttributeValue,
- ServerSideCode,
- PcData
- }
-
- private void IncrementPosition()
- {
- if (_crc32 != null)
- {
- // REVIEW: should we add some checksum code in DecrementPosition too?
- _crc32.AddToCRC32(_c);
- }
-
- _index++;
- _maxlineposition = _lineposition;
- if (_c == 10)
- {
- _lineposition = 1;
- _line++;
- }
- else
- {
- _lineposition++;
- }
- }
-
- private void DecrementPosition()
- {
- _index--;
- if (_lineposition == 1)
- {
- _lineposition = _maxlineposition;
- _line--;
- }
- else
- {
- _lineposition--;
- }
- }
-
- private void Parse()
- {
- int lastquote = 0;
- if (OptionComputeChecksum)
- {
- _crc32 = new Crc32();
- }
-
- _lastnodes = new Hashtable();
- _c = 0;
- _fullcomment = false;
- _parseerrors = new ArrayList();
- _line = 1;
- _lineposition = 1;
- _maxlineposition = 1;
-
- _state = ParseState.Text;
- _oldstate = _state;
- _documentnode._innerlength = _text.Length;
- _documentnode._outerlength = _text.Length;
- _remainderOffset = _text.Length;
-
- _lastparentnode = _documentnode;
- _currentnode = CreateNode(HtmlNodeType.Text, 0);
- _currentattribute = null;
-
- _index = 0;
- PushNodeStart(HtmlNodeType.Text, 0);
- while (_index<_text.Length)
- {
- _c = _text[_index];
- IncrementPosition();
-
- switch(_state)
- {
- case ParseState.Text:
- if (NewCheck())
- continue;
- break;
-
- case ParseState.WhichTag:
- if (NewCheck())
- continue;
- if (_c == '/')
- {
- PushNodeNameStart(false, _index);
- }
- else
- {
- PushNodeNameStart(true, _index-1);
- DecrementPosition();
- }
- _state = ParseState.Tag;
- break;
-
- case ParseState.Tag:
- if (NewCheck())
- continue;
- if (IsWhiteSpace(_c))
- {
- PushNodeNameEnd(_index-1);
- if (_state != ParseState.Tag)
- continue;
- _state = ParseState.BetweenAttributes;
- continue;
- }
- if (_c == '/')
- {
- PushNodeNameEnd(_index-1);
- if (_state != ParseState.Tag)
- continue;
- _state = ParseState.EmptyTag;
- continue;
- }
- if (_c == '>')
- {
- PushNodeNameEnd(_index-1);
- if (_state != ParseState.Tag)
- continue;
- if (!PushNodeEnd(_index, false))
- {
- // stop parsing
- _index = _text.Length;
- break;
- }
- if (_state != ParseState.Tag)
- continue;
- _state = ParseState.Text;
- PushNodeStart(HtmlNodeType.Text, _index);
- }
- break;
-
- case ParseState.BetweenAttributes:
- if (NewCheck())
- continue;
-
- if (IsWhiteSpace(_c))
- continue;
-
- if ((_c == '/') || (_c == '?'))
- {
- _state = ParseState.EmptyTag;
- continue;
- }
-
- if (_c == '>')
- {
- if (!PushNodeEnd(_index, false))
- {
- // stop parsing
- _index = _text.Length;
- break;
- }
-
- if (_state != ParseState.BetweenAttributes)
- continue;
- _state = ParseState.Text;
- PushNodeStart(HtmlNodeType.Text, _index);
- continue;
- }
-
- PushAttributeNameStart(_index-1);
- _state = ParseState.AttributeName;
- break;
-
- case ParseState.EmptyTag:
- if (NewCheck())
- continue;
-
- if (_c == '>')
- {
- if (!PushNodeEnd(_index, true))
- {
- // stop parsing
- _index = _text.Length;
- break;
- }
-
- if (_state != ParseState.EmptyTag)
- continue;
- _state = ParseState.Text;
- PushNodeStart(HtmlNodeType.Text, _index);
- continue;
- }
- _state = ParseState.BetweenAttributes;
- break;
-
- case ParseState.AttributeName:
- if (NewCheck())
- continue;
-
- if (IsWhiteSpace(_c))
- {
- PushAttributeNameEnd(_index-1);
- _state = ParseState.AttributeBeforeEquals;
- continue;
- }
- if (_c == '=')
- {
- PushAttributeNameEnd(_index-1);
- _state = ParseState.AttributeAfterEquals;
- continue;
- }
- if (_c == '>')
- {
- PushAttributeNameEnd(_index-1);
- if (!PushNodeEnd(_index, false))
- {
- // stop parsing
- _index = _text.Length;
- break;
- }
- if (_state != ParseState.AttributeName)
- continue;
- _state = ParseState.Text;
- PushNodeStart(HtmlNodeType.Text, _index);
- continue;
- }
- break;
-
- case ParseState.AttributeBeforeEquals:
- if (NewCheck())
- continue;
-
- if (IsWhiteSpace(_c))
- continue;
- if (_c == '>')
- {
- if (!PushNodeEnd(_index, false))
- {
- // stop parsing
- _index = _text.Length;
- break;
- }
- if (_state != ParseState.AttributeBeforeEquals)
- continue;
- _state = ParseState.Text;
- PushNodeStart(HtmlNodeType.Text, _index);
- continue;
- }
- if (_c == '=')
- {
- _state = ParseState.AttributeAfterEquals;
- continue;
- }
- // no equals, no whitespace, it's a new attrribute starting
- _state = ParseState.BetweenAttributes;
- DecrementPosition();
- break;
-
- case ParseState.AttributeAfterEquals:
- if (NewCheck())
- continue;
-
- if (IsWhiteSpace(_c))
- continue;
-
- if ((_c == '\'') || (_c == '"'))
- {
- _state = ParseState.QuotedAttributeValue;
- PushAttributeValueStart(_index);
- lastquote = _c;
- continue;
- }
- if (_c == '>')
- {
- if (!PushNodeEnd(_index, false))
- {
- // stop parsing
- _index = _text.Length;
- break;
- }
- if (_state != ParseState.AttributeAfterEquals)
- continue;
- _state = ParseState.Text;
- PushNodeStart(HtmlNodeType.Text, _index);
- continue;
- }
- PushAttributeValueStart(_index-1);
- _state = ParseState.AttributeValue;
- break;
-
- case ParseState.AttributeValue:
- if (NewCheck())
- continue;
-
- if (IsWhiteSpace(_c))
- {
- PushAttributeValueEnd(_index-1);
- _state = ParseState.BetweenAttributes;
- continue;
- }
-
- if (_c == '>')
- {
- PushAttributeValueEnd(_index-1);
- if (!PushNodeEnd(_index, false))
- {
- // stop parsing
- _index = _text.Length;
- break;
- }
- if (_state != ParseState.AttributeValue)
- continue;
- _state = ParseState.Text;
- PushNodeStart(HtmlNodeType.Text, _index);
- continue;
- }
- break;
-
- case ParseState.QuotedAttributeValue:
- if (_c == lastquote)
- {
- PushAttributeValueEnd(_index-1);
- _state = ParseState.BetweenAttributes;
- continue;
- }
- if (_c == '<')
- {
- if (_index<_text.Length)
- {
- if (_text[_index] == '%')
- {
- _oldstate = _state;
- _state = ParseState.ServerSideCode;
- continue;
- }
- }
- }
- break;
-
- case ParseState.Comment:
- if (_c == '>')
- {
- if (_fullcomment)
- {
- if ((_text[_index-2] != '-') ||
- (_text[_index-3] != '-'))
- {
- continue;
- }
- }
- if (!PushNodeEnd(_index, false))
- {
- // stop parsing
- _index = _text.Length;
- break;
- }
- _state = ParseState.Text;
- PushNodeStart(HtmlNodeType.Text, _index);
- continue;
- }
- break;
-
- case ParseState.ServerSideCode:
- if (_c == '%')
- {
- if (_index<_text.Length)
- {
- if (_text[_index] == '>')
- {
- switch(_oldstate)
- {
- case ParseState.AttributeAfterEquals:
- _state = ParseState.AttributeValue;
- break;
-
- case ParseState.BetweenAttributes:
- PushAttributeNameEnd(_index+1);
- _state = ParseState.BetweenAttributes;
- break;
-
- default:
- _state = _oldstate;
- break;
- }
- IncrementPosition();
- }
- }
- }
- break;
-
- case ParseState.PcData:
- // look for </tag + 1 char
-
- // check buffer end
- if ((_currentnode._namelength+3)<=(_text.Length-(_index-1)))
- {
- if (string.Compare(_text.Substring(_index-1, _currentnode._namelength+2),
- "</" + _currentnode.Name, true) == 0)
- {
- int c = _text[_index-1 + 2 + _currentnode.Name.Length];
- if ((c == '>') || (IsWhiteSpace(c)))
- {
- // add the script as a text node
- HtmlNode script = CreateNode(HtmlNodeType.Text,
- _currentnode._outerstartindex + _currentnode._outerlength);
- script._outerlength = _index-1 - script._outerstartindex;
- _currentnode.AppendChild(script);
-
-
- PushNodeStart(HtmlNodeType.Element, _index-1);
- PushNodeNameStart(false, _index-1 +2);
- _state = ParseState.Tag;
- IncrementPosition();
- }
- }
- }
- break;
- }
- }
-
- // finish the current work
- if (_currentnode._namestartindex > 0)
- {
- PushNodeNameEnd(_index);
- }
- PushNodeEnd(_index, false);
-
- // we don't need this anymore
- _lastnodes.Clear();
- }
-
- private bool NewCheck()
- {
- if (_c != '<')
- {
- return false;
- }
- if (_index<_text.Length)
- {
- if (_text[_index] == '%')
- {
- switch(_state)
- {
- case ParseState.AttributeAfterEquals:
- PushAttributeValueStart(_index-1);
- break;
-
- case ParseState.BetweenAttributes:
- PushAttributeNameStart(_index-1);
- break;
-
- case ParseState.WhichTag:
- PushNodeNameStart(true, _index-1);
- _state = ParseState.Tag;
- break;
- }
- _oldstate = _state;
- _state = ParseState.ServerSideCode;
- return true;
- }
- }
-
- if (!PushNodeEnd(_index-1, true))
- {
- // stop parsing
- _index = _text.Length;
- return true;
- }
- _state = ParseState.WhichTag;
- if ((_index-1) <= (_text.Length-2))
- {
- if (_text[_index] == '!')
- {
- PushNodeStart(HtmlNodeType.Comment, _index-1);
- PushNodeNameStart(true, _index);
- PushNodeNameEnd(_index+1);
- _state = ParseState.Comment;
- if (_index<(_text.Length-2))
- {
- if ((_text[_index+1] == '-') &&
- (_text[_index+2] == '-'))
- {
- _fullcomment = true;
- }
- else
- {
- _fullcomment = false;
- }
- }
- return true;
- }
- }
- PushNodeStart(HtmlNodeType.Element, _index-1);
- return true;
- }
-
- private void ReadDocumentEncoding(HtmlNode node)
- {
- if (!OptionReadEncoding)
- return;
- // format is
- // <meta http-equiv="content-type" content="text/html;charset=iso-8859-1" />
-
- // when we append a child, we are in node end, so attributes are already populated
- if (node._namelength == 4) // quick check, avoids string alloc
- {
- if (node.Name == "meta") // all nodes names are lowercase
- {
- HtmlAttribute att = node.Attributes["http-equiv"];
- if (att != null)
- {
- if (string.Compare(att.Value, "content-type", true) == 0)
- {
- HtmlAttribute content = node.Attributes["content"];
- if (content != null)
- {
- string charset = NameValuePairList.GetNameValuePairsValue(content.Value, "charset");
- if (charset != null)
- {
- _declaredencoding = Encoding.GetEncoding(charset);
- if (_onlyDetectEncoding)
- {
- throw new EncodingFoundException(_declaredencoding);
- }
-
- if (_streamencoding != null)
- {
- if (_declaredencoding.WindowsCodePage != _streamencoding.WindowsCodePage)
- {
- AddError(
- HtmlParseErrorCode.CharsetMismatch,
- _line, _lineposition,
- _index, node.OuterHtml,
- "Encoding mismatch between StreamEncoding: " +
- _streamencoding.WebName + " and DeclaredEncoding: " + _declaredencoding.WebName);
- }
- }
- }
- }
- }
- }
- }
- }
- }
-
- private void PushAttributeNameStart(int index)
- {
- _currentattribute = CreateAttribute();
- _currentattribute._namestartindex = index;
- _currentattribute._line = _line;
- _currentattribute._lineposition = _lineposition;
- _currentattribute._streamposition = index;
- }
-
- private void PushAttributeNameEnd(int index)
- {
- _currentattribute._namelength = index - _currentattribute._namestartindex;
- _currentnode.Attributes.Append(_currentattribute);
- }
-
- private void PushAttributeValueStart(int index)
- {
- _currentattribute._valuestartindex = index;
- }
-
- private void PushAttributeValueEnd(int index)
- {
- _currentattribute._valuelength = index - _currentattribute._valuestartindex;
- }
-
- private void PushNodeStart(HtmlNodeType type, int index)
- {
- _currentnode = CreateNode(type, index);
- _currentnode._line = _line;
- _currentnode._lineposition = _lineposition;
- if (type == HtmlNodeType.Element)
- {
- _currentnode._lineposition--;
- }
- _currentnode._streamposition = index;
- }
-
- private bool PushNodeEnd(int index, bool close)
- {
- _currentnode._outerlength = index - _currentnode._outerstartindex;
-
- if ((_currentnode._nodetype == HtmlNodeType.Text) ||
- (_currentnode._nodetype == HtmlNodeType.Comment))
- {
- // forget about void nodes
- if (_currentnode._outerlength>0)
- {
- _currentnode._innerlength = _currentnode._outerlength;
- _currentnode._innerstartindex = _currentnode._outerstartindex;
- if (_lastparentnode != null)
- {
- _lastparentnode.AppendChild(_currentnode);
- }
- }
- }
- else
- {
- if ((_currentnode._starttag) && (_lastparentnode != _currentnode))
- {
- // add to parent node
- if (_lastparentnode != null)
- {
- _lastparentnode.AppendChild(_currentnode);
- }
-
- ReadDocumentEncoding(_currentnode);
-
- // remember last node of this kind
- HtmlNode prev = (HtmlNode)_lastnodes[_currentnode.Name];
- _currentnode._prevwithsamename = prev;
- _lastnodes[_currentnode.Name] = _currentnode;
-
- // change parent?
- if ((_currentnode.NodeType == HtmlNodeType.Document) ||
- (_currentnode.NodeType == HtmlNodeType.Element))
- {
- _lastparentnode = _currentnode;
- }
-
- if (HtmlNode.IsCDataElement(CurrentNodeName()))
- {
- _state = ParseState.PcData;
- return true;
- }
-
- if ((HtmlNode.IsClosedElement(_currentnode.Name)) ||
- (HtmlNode.IsEmptyElement(_currentnode.Name)))
- {
- close = true;
- }
- }
- }
-
- if ((close) || (!_currentnode._starttag))
- {
- if ((OptionStopperNodeName != null) && (_remainder == null) &&
- (string.Compare(_currentnode.Name, OptionStopperNodeName, true) == 0))
- {
- _remainderOffset = index;
- _remainder = _text.Substring(_remainderOffset);
- CloseCurrentNode();
- return false; // stop parsing
- }
- CloseCurrentNode();
- }
- return true;
- }
-
- private void PushNodeNameStart(bool starttag, int index)
- {
- _currentnode._starttag = starttag;
- _currentnode._namestartindex = index;
- }
-
- private string[] GetResetters(string name)
- {
- switch (name)
- {
- case "li":
- return new string[]{"ul"};
-
- case "tr":
- return new string[]{"table"};
-
- case "th":
- case "td":
- return new string[]{"tr", "table"};
-
- default:
- return null;
- }
- }
-
- private void FixNestedTags()
- {
- // we are only interested by start tags, not closing tags
- if (!_currentnode._starttag)
- return;
-
- string name = CurrentNodeName().ToLower();
- FixNestedTag(name, GetResetters(name));
- }
-
- private void FixNestedTag(string name, string[] resetters)
- {
- if (resetters == null)
- return;
-
- HtmlNode prev;
-
- // if we find a previous unclosed same name node, without a resetter node between, we must close it
- prev = (HtmlNode)_lastnodes[name];
- if ((prev != null) && (!prev.Closed))
- {
-
- // try to find a resetter node, if found, we do nothing
- if (FindResetterNodes(prev, resetters))
- {
- return;
- }
-
- // ok we need to close the prev now
- // create a fake closer node
- HtmlNode close = new HtmlNode(prev.NodeType, this, -1);
- close._endnode = close;
- prev.CloseNode(close);
-
- }
- }
-
- private bool FindResetterNodes(HtmlNode node, string[] names)
- {
- if (names == null)
- {
- return false;
- }
- for(int i=0;i<names.Length;i++)
- {
- if (FindResetterNode(node, names[i]) != null)
- {
- return true;
- }
- }
- return false;
- }
-
- private HtmlNode FindResetterNode(HtmlNode node, string name)
- {
- HtmlNode resetter = (HtmlNode)_lastnodes[name];
- if (resetter == null)
- return null;
- if (resetter.Closed)
- {
- return null;
- }
- if (resetter._streamposition<node._streamposition)
- {
- return null;
- }
- return resetter;
- }
-
- private void PushNodeNameEnd(int index)
- {
- _currentnode._namelength = index - _currentnode._namestartindex;
- if (OptionFixNestedTags)
- {
- FixNestedTags();
- }
- }
-
- private void CloseCurrentNode()
- {
- if (_currentnode.Closed) // text or document are by def closed
- return;
-
- bool error = false;
-
- // find last node of this kind
- HtmlNode prev = (HtmlNode)_lastnodes[_currentnode.Name];
- if (prev == null)
- {
- if (HtmlNode.IsClosedElement(_currentnode.Name))
- {
- // </br> will be seen as <br>
- _currentnode.CloseNode(_currentnode);
-
- // add to parent node
- if (_lastparentnode != null)
- {
- HtmlNode foundNode = null;
- Stack futureChild = new Stack();
- for (HtmlNode node = _lastparentnode.LastChild; node != null; node = node.PreviousSibling)
- {
- if ((node.Name == _currentnode.Name) && (! node.HasChildNodes))
- {
- foundNode = node;
- break;
- }
- futureChild.Push(node);
- }
- if (foundNode != null)
- {
- HtmlNode node = null;
- while(futureChild.Count != 0)
- {
- node = (HtmlNode)futureChild.Pop();
- _lastparentnode.RemoveChild(node);
- foundNode.AppendChild(node);
- }
- }
- else
- {
- _lastparentnode.AppendChild(_currentnode);
- }
-
- }
- }
- else
- {
- // node has no parent
- // node is not a closed node
-
- if (HtmlNode.CanOverlapElement(_currentnode.Name))
- {
- // this is a hack: add it as a text node
- HtmlNode closenode = CreateNode(HtmlNodeType.Text, _currentnode._outerstartindex);
- closenode._outerlength = _currentnode._outerlength;
- ((HtmlTextNode)closenode).Text = ((HtmlTextNode)closenode).Text.ToLower();
- if (_lastparentnode != null)
- {
- _lastparentnode.AppendChild(closenode);
- }
-
- }
- else
- {
- if (HtmlNode.IsEmptyElement(_currentnode.Name))
- {
- AddError(
- HtmlParseErrorCode.EndTagNotRequired,
- _currentnode._line, _currentnode._lineposition,
- _currentnode._streamposition, _currentnode.OuterHtml,
- "End tag </" + _currentnode.Name + "> is not required");
- }
- else
- {
- // node cannot overlap, node is not empty
- AddError(
- HtmlParseErrorCode.TagNotOpened,
- _currentnode._line, _currentnode._lineposition,
- _currentnode._streamposition, _currentnode.OuterHtml,
- "Start tag <" + _currentnode.Name + "> was not found");
- error = true;
- }
- }
- }
- }
- else
- {
- if (OptionFixNestedTags)
- {
- if (FindResetterNodes(prev, GetResetters(_currentnode.Name)))
- {
- AddError(
- HtmlParseErrorCode.EndTagInvalidHere,
- _currentnode._line, _currentnode._lineposition,
- _currentnode._streamposition, _currentnode.OuterHtml,
- "End tag </" + _currentnode.Name + "> invalid here");
- error = true;
- }
- }
-
- if (!error)
- {
- _lastnodes[_currentnode.Name] = prev._prevwithsamename;
- prev.CloseNode(_currentnode);
- }
- }
-
-
- // we close this node, get grandparent
- if (!error)
- {
- if ((_lastparentnode != null) &&
- ((!HtmlNode.IsClosedElement(_currentnode.Name)) ||
- (_currentnode._starttag)))
- {
- UpdateLastParentNode();
- }
- }
- }
-
- internal void UpdateLastParentNode()
- {
- do
- {
- if (_lastparentnode.Closed)
- {
- _lastparentnode = _lastparentnode.ParentNode;
- }
- }
- while ((_lastparentnode != null) && (_lastparentnode.Closed));
- if (_lastparentnode == null)
- {
- _lastparentnode = _documentnode;
- }
- }
-
- private string CurrentAttributeName()
- {
- return _text.Substring(_currentattribute._namestartindex, _currentattribute._namelength);
- }
-
- private string CurrentAttributeValue()
- {
- return _text.Substring(_currentattribute._valuestartindex, _currentattribute._valuelength);
- }
-
- private string CurrentNodeName()
- {
- return _text.Substring(_currentnode._namestartindex, _currentnode._namelength);
- }
-
- private string CurrentNodeOuter()
- {
- return _text.Substring(_currentnode._outerstartindex, _currentnode._outerlength);
- }
-
- private string CurrentNodeInner()
- {
- return _text.Substring(_currentnode._innerstartindex, _currentnode._innerlength);
- }
-
- /// <summary>
- /// Determines if the specified character is considered as a whitespace character.
- /// </summary>
- /// <param name="c">The character to check.</param>
- /// <returns>true if if the specified character is considered as a whitespace character.</returns>
- public static bool IsWhiteSpace(int c)
- {
- if ((c == 10) || (c == 13) || (c == 32) || (c == 9))
- {
- return true;
- }
- return false;
- }
-
- }
-
- internal class EncodingFoundException: Exception
- {
- private Encoding _encoding;
-
- internal EncodingFoundException(Encoding encoding)
- {
- _encoding = encoding;
- }
-
- internal Encoding Encoding
- {
- get
- {
- return _encoding;
- }
- }
- }
-}