Initial commit (version 0.5 of May 2006)

author: Jonas John <jonas@jonasjohn.de> 2012-02-09 13:42:59 +0400
committer: Jonas John <jonas@jonasjohn.de> 2012-02-09 13:42:59 +0400
commit: a44eee9e97a7851596e26de2c9e53fa569df7389 (patch)
tree: 27904364de31aad4ddfab8bc006261fc6a382382
19 files changed, 2884 insertions, 0 deletions
diff --git a/examples/demo_01.php b/examples/demo_01.php
new file mode 100755
index 0000000..a168b82
--- /dev/null
+++ b/examples/demo_01.php
@@ -0,0 +1,46 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 1
+    **
+    ** Shows a simple query
+    */
+    
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    $wsql = new htmlsql();
+    
+    // connect to a URL
+    if (!$wsql->connect('url', 'http://codedump.jonasjohn.de/')){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+    
+    /* execute a query:
+        
+       This query extracts all links with the classname = nav_item   
+    */
+    if (!$wsql->query('SELECT * FROM a WHERE $class == "nav_item"')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // show results:
+    foreach($wsql->fetch_array() as $row){
+    
+        print_r($row);
+        
+        /* 
+        $row is an array and looks like this:
+        Array (
+            [href] => /feedback.htm
+            [class] => nav_item
+            [tagname] => a
+            [text] => Feedback
+        )
+        */
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_02.php b/examples/demo_02.php
new file mode 100755
index 0000000..b660af1
--- /dev/null
+++ b/examples/demo_02.php
@@ -0,0 +1,38 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 2
+    **
+    ** Shows a simple query and the "href as url" usage
+    */
+    
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    $wsql = new htmlsql();
+    
+    // connect to a file
+    if (!$wsql->connect('file', 'demo_data.htm')){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+    
+    /* execute a query:
+       
+       This query extracts all links from the document
+       and just returns href (as url) and text
+    */
+    if (!$wsql->query('SELECT href as url, text FROM a')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // show results:
+    foreach($wsql->fetch_array() as $row){
+    
+        print "Link-URL: " . $row['url'] . "\n";
+        print "Link-Text: " . trim($row['text']) . "\n\n";
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_03.php b/examples/demo_03.php
new file mode 100755
index 0000000..dc05e19
--- /dev/null
+++ b/examples/demo_03.php
@@ -0,0 +1,37 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 3
+    **
+    ** Shows how to connect to a file and a simple query
+    */
+
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    $wsql = new htmlsql();
+    
+    // connect to a file
+    if (!$wsql->connect('file', 'demo_data.htm')){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+    
+    /* execute a query:
+       
+       This query searches in all tags for the id == header and returns
+       the tag
+    */
+    if (!$wsql->query('SELECT * FROM * WHERE $id == "header"')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // show results:
+    foreach($wsql->fetch_array() as $row){
+    
+        print_r($row);
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_04.php b/examples/demo_04.php
new file mode 100755
index 0000000..5f7df78
--- /dev/null
+++ b/examples/demo_04.php
@@ -0,0 +1,36 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 4
+    **
+    ** Shows a advanced query with preg_match
+    */
+
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    $wsql = new htmlsql();
+    
+    // connect to a URL
+    if (!$wsql->connect('url', 'http://codedump.jonasjohn.de/links.htm')){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+    
+    /* execute a query:
+       
+       This query returns all links of an document that start with http://
+    */
+    if (!$wsql->query('SELECT * FROM a WHERE preg_match("/^http:\/\//", $href)')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // show results:
+    foreach($wsql->fetch_array() as $row){
+    
+        print_r($row);
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_05.php b/examples/demo_05.php
new file mode 100755
index 0000000..48ea46f
--- /dev/null
+++ b/examples/demo_05.php
@@ -0,0 +1,38 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 5
+    **
+    ** Shows a advanced query (with substr)
+    */
+
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    $wsql = new htmlsql();
+    
+    // connect to a URL
+    if (!$wsql->connect('url', 'http://codedump.jonasjohn.de/links.htm')){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+    
+    /* execute a query:
+       
+       This query returns all links of an document that not start with / 
+       ( / = internal links)
+    */
+    if (!$wsql->query('SELECT * FROM a WHERE substr($href,0,1) != "/"')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // fetch results as object and format as HTML links:
+    foreach($wsql->fetch_objects() as $obj){
+    
+        print '<a href="'.$obj->href.'">'.$obj->text.'</a><br/>';
+        print "\n";
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_06.php b/examples/demo_06.php
new file mode 100755
index 0000000..0e2a2c2
--- /dev/null
+++ b/examples/demo_06.php
@@ -0,0 +1,40 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 6
+    **
+    ** Show how to connect to a string
+    */
+
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    
+    $some_html = '<a href="link1.htm">link1</a> <b>foobar</b> ';
+    $some_html .= '<a href="link2.htm">link2</a> <hr/>';
+    
+    $wsql = new htmlsql();
+    
+    // connect to a string
+    if (!$wsql->connect('string', $some_html)){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+    
+    /* execute a query:
+       
+       This query returns all links of the given HTML
+    */
+    if (!$wsql->query('SELECT * FROM a')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // fetch results as array and output them:
+    foreach($wsql->fetch_array() as $row){
+    
+        print_r($row);
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_07.php b/examples/demo_07.php
new file mode 100755
index 0000000..f293cb9
--- /dev/null
+++ b/examples/demo_07.php
@@ -0,0 +1,37 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 7
+    **
+    ** Shows a complex query
+    */
+
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    $wsql = new htmlsql();
+    
+    // connect to a URL
+    if (!$wsql->connect('url', 'http://codedump.jonasjohn.de/browse/lang/php/')){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+    
+    /* execute a query:
+       
+       This query searches all links where the URL starts with /snippets and the text starts with 
+       "array_" => so all links to array functions will be returned
+    */
+    if (!$wsql->query('SELECT * FROM a WHERE preg_match("/^\/snippets/i", $href) and preg_match("/^array_/i", $text)')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // fetch results as array return them:
+    foreach($wsql->fetch_array() as $row){
+    
+        print_r($row);
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_08.php b/examples/demo_08.php
new file mode 100755
index 0000000..e25a009
--- /dev/null
+++ b/examples/demo_08.php
@@ -0,0 +1,86 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 8
+    **
+    ** Shows how to parse a RSS/XML file with htmlSQL
+    */
+
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    $wsql = new htmlsql();
+    
+    // connect to the RSS URL (this URL contains new snippets from my codedump project)
+    if (!$wsql->connect('url', 'http://codedump.jonasjohn.de/rss/')){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+    
+    /* execute a query:
+       
+       select the text attribute (alias for the tag content) from the <item> tag
+    */
+    if (!$wsql->query('SELECT text FROM item')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // fetch all results as objects:
+    foreach($wsql->fetch_objects() as $obj){
+        
+        // create a new htmlsql object:
+        $sub_wsql = new htmlsql();
+        
+        // connect to the <item> content:
+        $sub_wsql->connect('string', $obj->text);
+            
+        // fetch all attributes of all tags:
+        if (!$sub_wsql->query('SELECT * FROM *')){
+            print "Query error: " . $wsql->error; 
+            exit;
+        }
+        
+        // this "special" function converts tagnames to keys
+        $sub_wsql->convert_tagname_to_key();
+        
+        /* this function converts an array that looks like this:
+        
+            $array[0]['tagname'] = 'title';
+            $array[0]['text'] = 'example 1';
+            
+            $array[1]['tagname'] = 'link';
+            $array[1]['text'] = 'http://www.example.org/';
+            
+            $array[2]['tagname'] = 'description';
+            $array[2]['text'] = 'description bla';
+            $array[2]['fulltext'] = '1'; // additional attribute
+            
+            -> to:
+            
+            $array['title']['text'] = 'example 1';
+            
+            $array[1]['link']['text'] = 'http://www.example.org/';
+            
+            $array[2]['description']['text'] = 'description bla';
+            $array[2]['description']['fulltext'] = '1'; // additional attribute
+            
+            this makes the array easier to access
+            
+        */
+        
+        
+        // fetch item as array:
+        $item = $sub_wsql->fetch_array();
+                
+        // format the extracted links as HTML links and output them:
+        print "<a href=\"" . $item['link']['text'] . "\">";
+        print $item['title']['text'] . "</a><br/>\n";
+        
+        // also available:
+        // description, pubDate
+        
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_09.php b/examples/demo_09.php
new file mode 100755
index 0000000..25fc23e
--- /dev/null
+++ b/examples/demo_09.php
@@ -0,0 +1,51 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 9
+    **
+    ** Shows how to use the "select" function
+    */
+
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    $wsql = new htmlsql();
+    
+    // connect to a URL
+    if (!$wsql->connect('url', 'http://codedump.jonasjohn.de/')){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+    
+    // restricts the search process to the content between
+    // <body> and </body>
+    // this also works with other tags like: head or html, or table
+    $wsql->select('body');
+    
+    /*
+        other examples:
+    
+        $wsql->select('div',3); <-- selects the third <div>
+        
+        $wsql->select('table',0); <-- selects the first table        
+                            ^ default is also = 0
+    */
+    
+    
+    /* execute a query:
+       
+       This query returns all <h1> headers
+    */
+    if (!$wsql->query('SELECT * FROM h1')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // fetch results as array
+    foreach($wsql->fetch_array() as $row){
+    
+        print_r($row);
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_10.php b/examples/demo_10.php
new file mode 100755
index 0000000..961a0a8
--- /dev/null
+++ b/examples/demo_10.php
@@ -0,0 +1,55 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 10
+    **
+    ** Shows how to use the "isolate_content" function
+    */
+
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    $wsql = new htmlsql();
+    
+    // connect to a URL
+    if (!$wsql->connect('url', 'http://codedump.jonasjohn.de/')){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+    
+    /*
+    ** The isolate_content functions works like the select function,
+    ** but you can specify custom HTML parts, the content between
+    ** these two strings will be used for the query process
+    **
+    ** In this case we select all content between "<h1>New snippets</h1>"
+    ** and "<p id="rss">" this returns all snippet links, and no other links
+    ** (like header or navigation links)
+    */
+
+    $wsql->isolate_content('<h1>New snippets</h1>', '<p id="rss">');
+    
+    /*
+        other examples:
+    
+        $wsql->isolate_content('<body>', '</body>');
+        $wsql->isolate_content('<!--content:start-->', '<!--end-->');
+    */
+    
+    /* execute a query:
+       
+       This query returns all links:
+    */
+    if (!$wsql->query('SELECT * FROM a')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // fetch results as array
+    foreach($wsql->fetch_array() as $row){
+    
+        print_r($row);
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_11.php b/examples/demo_11.php
new file mode 100755
index 0000000..591598f
--- /dev/null
+++ b/examples/demo_11.php
@@ -0,0 +1,36 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 11
+    **
+    ** Shows how to query a simple XML file
+    */
+
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    $wsql = new htmlsql();
+    
+    // connect to the demo XML file:
+    if (!$wsql->connect('file', 'demo_xml.xml')){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+
+    /* execute a query:
+       
+       This query returns the id, name and password of all active users
+    */
+    if (!$wsql->query('SELECT id, name, password FROM user WHERE $status == "active"')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // fetch results as array
+    foreach($wsql->fetch_array() as $row){
+    
+        print_r($row);
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_12.php b/examples/demo_12.php
new file mode 100755
index 0000000..68d524a
--- /dev/null
+++ b/examples/demo_12.php
@@ -0,0 +1,44 @@
+<?php
+
+    /*
+    ** htmlSQL - Example 12
+    **
+    ** Shows how to replace the user agent and the referer with
+    ** custom values
+    */
+
+    include_once("../snoopy.class.php");
+    include_once("../htmlsql.class.php");
+    
+    $wsql = new htmlsql();
+    
+    // set a individual agent:
+    $wsql->set_user_agent('MyAgentName/0.9');
+    
+    // set a new referer:
+    $wsql->set_referer('http://www.jonasjohn.de/custom/referer/');
+    
+    
+    // connect to a URL
+    if (!$wsql->connect('url', 'http://codedump.jonasjohn.de/')){
+        print 'Error while connecting: ' . $wsql->error;
+        exit;
+    }
+    
+    /* execute a query:
+       
+       This query returns all links:
+    */
+    if (!$wsql->query('SELECT * FROM a')){
+        print "Query error: " . $wsql->error; 
+        exit;
+    }
+
+    // fetch results as array
+    foreach($wsql->fetch_array() as $row){
+    
+        print_r($row);
+        
+    }
+    
+?>
+\ No newline at end of file
diff --git a/examples/demo_data.htm b/examples/demo_data.htm
new file mode 100755
index 0000000..bd5d766
--- /dev/null
+++ b/examples/demo_data.htm
@@ -0,0 +1,195 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<!-- 
+    this is a modified HTML source code from www.jonasjohn.de 
+    for htmlSQL - testing purposes only
+    
+    Copyright (c) 2004-2006 Jonas John
+-->
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+        <title>jonasjohn.de: startpage</title>
+        
+        <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+        <meta http-equiv="Content-Language" content="en" />
+        
+        <meta name="description" content="Personal WebSite of Jonas John." />
+        <meta name="keywords" content="jonas, john, photos, design, php, tests, experiments, privat, portfolio" />
+
+        <meta name="MSSmartTagsPreventParsing" content="true" />
+        <meta http-equiv="imagetoolbar" content="no" />
+
+
+    </head>
+<body>
+    
+<p class="hidden"> 
+    <a href="#content" accesskey="s">Skip to content...</a>
+</p>
+
+<div id="nav">
+    <div id="inner_nav">
+    
+        <a href="http://www.jonasjohn.de/" id="logo" accesskey="h">
+            <img src="/img/logo.png" width="137" height="30" alt="jonasjohn.de - logo" />
+        </a>
+    
+        <div id="lang">
+            <span class="hidden">Choose your language:</span>
+            <a href="/" accesskey="e" id="lang_sel"><img src="/img/l_eng.png" alt="" width="27" height="20" /> english</a> 
+            <a href="/de/" id="lang_def" accesskey="d"><img src="/img/l_de.png" alt="" width="27" height="20" /> deutsch</a>
+        </div>
+        
+        <br class="clear" />
+    
+        <div id="info_panel"></div>
+  
+    </div>
+</div>
+
+    
+<div id="header">
+    <div id="inner_header">
+            
+        <span class="hidden">Navigation:</span>
+        <ul>
+            <li><a href="/" accesskey="1">Home</a></li>
+            <li><a href="/lab/" accesskey="2">Lab</a></li>
+            <li><a href="/pictures/" accesskey="3">Photos</a></li>
+            <li><a href="/about/" accesskey="4">About me</a></li>
+            <li><a href="/sitemap.htm" accesskey="5">Sitemap</a></li>
+            <li><a href="/contact.htm" accesskey="6">Contact</a></li>
+        </ul>
+        
+        <br class="clear" />
+    </div>
+</div>
+
+<div id="page">    
+    <div id="inner_page">
+    
+        <a name="content"></a>
+    
+        <div id="h_left">
+            
+            <h1 class="big">
+                &#172; welcome to...<br/>
+                <span class="sub">the personal website of jonas john!</span>
+            </h1>
+                
+            <p>
+            
+                Hello and welcome to the personal website of <b>Jonas John</b>. This is my personal
+                web playground, I use it to present myself and to create some experimental
+                things. Have fun! 
+            
+    
+                <br/>
+                <br/>
+                <br/>
+                
+            </p>
+        </div>
+        
+        <div id="h_right">
+            <p>
+                <b>News (May 04, 2006):</b><br/>
+                I published the third version of my website. Now it's almost 
+                completely translated in English. Just a few texts left.
+                <br/>
+                <br/>
+                
+                <a href="/news.htm" id="more">News archive...</a>
+                <br/>
+            </p>
+            
+        </div>
+        
+        <br class="clear" />
+            
+        <div id="inner_content" class="clear sect_spacer">
+        
+            <div class="large_box">
+        
+                    <h2>What do I find here?</h2>
+                    
+                    <div class="halfbox bleft">
+                        <p>
+                            <a href="/lab/">
+                                <img src="img/p_code.png" alt="my lab" width="120" height="90" /><br/><span class="plink">Lab</span>
+                            </a>
+                            
+                            Look on this page to get some informations about my 
+                            <b>web projects</b> and software that I made. 
+                            
+                        </p>
+                    </div>
+                        
+                    <div class="halfbox bright">
+                        <p>
+                            <a href="/pictures/">
+                                <img src="img/p_photo.png" alt="photos" class="img_left" width="120" height="90" /><br/><span class="plink">Photos</span>
+                            </a>
+                            
+                            Here you find a few <b>photos</b> I made. I'm an amateur photographer,
+                            so don't expect too much ;-)
+                            
+                        </p>
+                    </div>
+                    
+                    <br class="clear" />
+                    <br class="clear boxspacer" />
+                    
+                    <div class="halfbox bleft">
+                        <p>
+                            <a href="/lab/adblock.htm">
+                                <img src="img/p_adblock.png" alt="adblock filterset generator" class="img_left" width="120" height="90" /><br/><span class="plink">Adblock F. Generator</span>
+                            </a>
+                            
+                            This <b>Adblock Plus Filterset Generator</b> allows you to create your own customized
+                            filterlist for the Firefox Plugin &quot;Adblock Plus&quot;. Just check or uncheck
+                            the filters you want.
+                    
+   
+                        </p>
+                    </div>
+                    
+                    <div class="halfbox bright">
+                        <p>
+                            <a href="/lab/codedump.htm">
+                                <img src="img/p_codedump.png" alt="codedump" class="img_left" width="120" height="90" /><br/><span class="plink">Codedump</span>
+                            </a>
+                            
+                            Here you can find around 70 <b>code snippets</b> for different topics.
+                            The snippet languages are PHP, JavaScript, HTML, Perl and Python.
+                            You can use them freely in your projects (public domain).
+                      
+                        </p>
+                    </div>
+                    
+                    <br class="clear" />
+                    <br/>
+            </div>
+            
+            <br/>
+            
+            <br class="clear" />
+        
+        </div>
+                
+    </div>
+
+</div>
+
+<div id="footer">
+    <p>
+        Copyright &copy; 2004-2006 Jonas David John. All rights reserved. 
+        <a href="/contact.htm">Imprint</a> 
+    </p>
+</div>
+
+</body>
+</html>
+
diff --git a/examples/demo_xml.xml b/examples/demo_xml.xml
new file mode 100755
index 0000000..e54968e
--- /dev/null
+++ b/examples/demo_xml.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<users>
+    <user id="0" name="admin" password="foobar" status="active"></user>
+    <user id="1" name="jonas" password="heyho" status="active"></user>
+    <user id="2" name="jack" password="daniels" status="drunk"></user>
+    <user id="3" name="bill" password="kill" status="dead"></user>
+    <user id="4" name="homer" password="kenny" status="active"></user>
+</users>
+\ No newline at end of file
diff --git a/examples/query_examples.txt b/examples/query_examples.txt
new file mode 100755
index 0000000..638bd6c
--- /dev/null
+++ b/examples/query_examples.txt
@@ -0,0 +1,51 @@
+
+Some query examples for copy & paste ;-)
+
+
+SELECT * FROM h1
+^ select all <h1> tags
+
+
+SELECT * FROM a
+^ select all links
+
+
+SELECT * FROM td
+^ select all <td>'s
+
+
+SELECT href as url, text FROM a
+^ return href as url and text as text from all links
+
+
+SELECT * FROM a WHERE preg_match("/^http:\/\//", $href)
+^ find all external links
+
+
+SELECT * FROM a WHERE preg_match("/^\/snippets/i", $href) and preg_match("/^array_/i", $text)
+^ find all links starting with /snippets and with a link text starting with "array_"
+
+
+SELECT * FROM *
+^ select all attributes of all tags ;-)
+
+
+SELECT id, name, password FROM user WHERE $status == "active"
+^ select all <user> tags where status="active" (for XML files)
+
+
+SELECT * FROM * WHERE $id == "header"
+^ return all tags with the $id = header
+
+
+SELECT * FROM a WHERE substr($href,0,1) != "/"
+^ select links with URLs that start with / (mainly internal links)
+
+
+SELECT * FROM * WHERE $class == "nav_item"
+^ select all tags with the class = nav_item
+
+
+SELECT * FROM a WHERE ($href == "foo.htm" and $title == "foo") or ($title == "bar")
+^ complex query
+
diff --git a/htmlsql.class.php b/htmlsql.class.php
new file mode 100755
index 0000000..2292073
--- /dev/null
+++ b/htmlsql.class.php
@@ -0,0 +1,677 @@
+<?php
+
+/*
+htmlSQL - version 0.5
+--------------------------------------------------------------------
+htmlSQL is a experimental class to query websites or HTML code with 
+an SQL-like language.
+
+AUTHOR: Jonas John (http://www.jonasjohn.de/)
+
+The latest version of htmlSQL can be obtained from:
+http://www.jonasjohn.de/lab/htmlsql.htm
+
+LICENSE:
+--------------------------------------------------------------------
+Copyright (c) 2006 Jonas John. All rights reserved.
+--------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without 
+modification, are permitted provided that the following conditions 
+are met:
+
+- Redistributions of source code must retain the above copyright 
+  notice, this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright 
+  notice, this list of conditions and the following disclaimer in 
+  the documentation and/or other materials provided with the distribution.
+- Neither the name of Jonas John nor the names of its contributors 
+  may be used to endorse or promote products derived from this 
+  software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+    
+--------------------------------------------------------------------
+
+CHANGELOG:
+
+0.4 -> 0.5 (May 07, 2006):
+- Renamed the project from webSQL to htmlSQL, because webSQL
+  is already existing... :-(
+- Added some error checks and error messages 
+- Added the convert_tagname_to_key function and
+  fixed a few issues
+
+0.1 -> 0.4 (April 2006):
+- Created main parts of the class
+    
+*/
+
+class htmlsql {
+
+    // configuration:
+
+    // htmlSQL version:
+    var $version = '0.5';
+
+    // referer and user agent:
+    var $referer = '';
+    var $user_agent = 'htmlSQL/0.5';
+    
+    
+    
+    // these are filled on runtime:
+    // (don't touch them)
+    
+    // holds snoopy object:
+    var $snoopy = NULL;
+    
+    // the results array is stored in here:
+    var $results = array();
+    
+    // the results objects are stored in here:
+    var $results_objects = NULL;
+
+    // the error message gets stored in here:
+    var $error = '';
+    
+    // the downloaded page is stored in here:
+    var $page = '';
+    
+    
+    /*
+    ** init_snoopy
+    **
+    ** initializes the snoopy class
+    */
+    
+    function init_snoopy(){
+        $this->snoopy = new Snoopy();
+        $this->snoopy->agent = $this->user_agent;
+        $this->snoopy->referer = $this->referer;
+    }
+    
+    
+    /*
+    ** set_user_agent
+    **
+    ** set a custom user agent
+    */
+    
+    function set_user_agent($u){ 
+        $this->user_agent = $u;
+    }
+    
+    
+    
+    /*
+    ** set_referer
+    **
+    ** sets the referer
+    */
+    
+    function set_referer($r){ 
+        $this->referer = $r;
+    }
+    
+    
+    /*
+    ** _get_between
+    **
+    ** returns the content between $start and $end
+    */
+    
+    function _get_between($content,$start,$end){
+        $r = explode($start, $content);
+        if (isset($r[1])){
+            $r = explode($end, $r[1]);
+            return $r[0];
+        }
+        return '';
+    }
+    
+    
+    /*
+    ** connect
+    **
+    ** connects to a data source (url, file or string)
+    */
+    
+    function connect($type, $resource){        
+        if ($type == 'url'){ 
+            return $this->_fetch_url($resource);
+        }
+        else if ($type == 'file') { 
+            if (!file_exists($resource)){ 
+                $this->error = 'The given file "'.$resource.' does not exist!';
+                return false;
+            }
+            $this->page = file_get_contents($resource); return true;
+        }
+        else if ($type == 'string') { $this->page = $resource; return true; }
+        
+        return false;
+    }
+    
+    
+    /*
+    ** _fetch_url
+    **
+    ** downloads the given URL with snoopy
+    */
+    
+    function _fetch_url($url){
+    
+        $parsed_url = parse_url($url);
+        
+        if (!isset($parsed_url['scheme']) or $parsed_url['scheme'] != 'http'){ 
+            $this->error = 'Unsupported URL sheme given, please just use "HTTP".';
+            return false;
+        }
+        if (!isset($parsed_url['host']) or $parsed_url['host'] == ''){ 
+            $this->error = 'Invalid URL given!';
+            return false;
+        }
+        
+        $host = $parsed_url['host'];
+        $host .= (isset($parsed_url['port']) and  !empty($parsed_url['port'])) ? ':'.$parsed_url['port'] : '';
+        $path = (isset($parsed_url['path']) and  !empty($parsed_url['path'])) ? $parsed_url['path'] : '/';
+        $path .= (isset($parsed_url['query']) and  !empty($parsed_url['query'])) ? '?'.$parsed_url['query'] : '';
+        
+        $url = 'http://' . $host . $path;
+    
+        $this->init_snoopy();
+        
+        if($this->snoopy->fetch($url)){
+        
+            $this->page = $this->snoopy->results;
+            
+            // empty buffer:
+            $this->snoopy->results = '';                
+        }
+        else {
+            $this->error = 'Could not establish a connection to the given URL!';
+            return false;
+        }            
+        return true;        
+    }
+    
+    
+    /*
+    ** _extract_all_tags
+    **
+    ** 
+    */
+    
+    function _extract_all_tags($html, &$tag_names, &$tag_attributes, &$tag_values, $depth=0){
+        
+        // stop endless loops:
+        if ($depth > 99999){ return; }
+        
+        preg_match_all('/<([a-z0-9\-]+)(.*?)>((.*?)<\/\1>)?/is', $html, $m);
+        if (count($m[0]) != 0){
+            for ($t=0; $t < count($m[0]); $t++){
+            
+                $tag_names[] = trim($m[1][$t]);
+                $tag_attributes[] = trim($m[2][$t]);
+                $tag_values[] = trim($m[4][$t]);
+                
+                // go deeper:
+                if (trim($m[4][$t]) != '' and preg_match('/<[a-z0-9\-]+.*?>/is', $m[4][$t])){
+                    $this->_extract_all_tags($m[4][$t], $tag_names, $tag_attributes, $tag_values, $depth+1);
+                }
+                
+            }
+        }
+        
+    }
+    
+    
+    /*
+    ** isolate_content
+    **
+    ** isolates the content to a specific part
+    */
+    
+    function isolate_content($start,$end){
+    
+        $this->page = $this->_get_between($this->page, $start, $end);
+    
+    }
+    
+
+    /*
+    ** select
+    **
+    ** restricts the content of a specific tag
+    */
+    
+    function select($tagname, $num=0){        
+        $num++;
+    
+        if ($tagname != ''){
+        
+            preg_match('/<'.$tagname.'.*?>(.*?)<\/'.$tagname.'>/is', $this->page, $m);
+        
+            if (isset($m[$num]) and !empty($m[$num])){ 
+                $this->page = $m[$num];
+            } 
+            else {
+                $this->error = 'Could not select tag: "'.$tagname.'('.$num.')"!';
+                return false;
+            }                
+        }
+        return true;        
+    }
+    
+    
+    /*
+    ** get_content
+    **
+    ** returns the content of an request
+    */
+    
+    function get_content(){ 
+        return $this->page;
+    }
+    
+    
+    /*
+    ** _clean_array
+    **
+    ** 
+    */
+    
+    function _clean_array($arr){
+        $new = array();
+        for ($x=0; $x < count($arr); $x++){
+            $arr[$x] = trim($arr[$x]);
+            if ($arr[$x] != ''){ $new[] = $arr[$x]; }
+        }
+        return $new;
+    }
+    
+    
+     /*
+    ** _test_tag
+    **
+    ** 
+    */
+    
+    function _test_tag($tag_attributes, $if_term){
+    
+        preg_match_all('/\$([a-z0-9_\-]+)/i', $if_term, $m);
+        if (isset($m[1])){
+            for ($x=0; $x < count($m[1]); $x++){
+                $varname = $m[1][$x];
+                $$varname = '';
+            }
+        }
+        
+        $new_list = array();
+        while (list($k,$v) = each($tag_attributes)){
+            $k = preg_replace('/[^a-z0-9_\-]/i', '', $k);
+            if ($k != ''){ $new_list[$k] = $v; }
+        }
+        unset($tag_attributes);
+        
+        extract($new_list);    
+        
+        $r = false;            
+        if (@eval('$r = ('.$if_term.');') === false){
+            $this->error = 'The WHERE statement is invalid (eval() failed)!';
+            return false;
+        }
+        
+        return $r;
+    
+    }
+    
+    
+    /*
+    ** _match_tags
+    **
+    ** 
+    */
+    
+    function _match_tags(&$results, &$return_values, &$where_term, &$tag_attributes, &$tag_values, &$tag_names){
+    
+        $search_mode = ''; $search_attribute = ''; $search_term = '';
+        
+        /*
+        ** parse:
+        ** 
+        ** href LIKE ".htm"
+        ** class = "foo"
+        */
+        
+        $where_term = trim($where_term);
+
+        $search_mode = ($where_term == '') ? 'match_all' : 'eval';
+
+        for ($x=0; $x < count($tag_attributes); $x++){
+        
+            $tag_attributes[$x] = $this->parse_attributes($tag_attributes[$x]);
+            
+            if (is_array($tag_names)){ 
+                $tag_attributes[$x]['tagname'] = isset($tag_names[$x]) ? $tag_names[$x] : '';
+            } 
+            else { $tag_attributes[$x]['tagname'] = $tag_names; } // string
+            
+            $tag_attributes[$x]['text'] = isset($tag_values[$x]) ? $tag_values[$x] : '';
+
+            if ($search_mode == 'eval'){
+            
+                if ($this->_test_tag($tag_attributes[$x], $where_term)){
+                    $this->_add_result($results, $return_values, $tag_attributes[$x]);
+                }                   
+            
+            }
+            else if ($search_mode == 'match_all'){
+                $this->_add_result($results, $return_values, $tag_attributes[$x]);
+            }
+        }
+    }
+    
+    
+    /*
+    ** query
+    **
+    ** performs a query
+    */
+    
+    function query($term){
+    
+        // query results are stored in here:
+        $results = array();
+        $this->results = NULL;
+        $this->results_objects = NULL;
+    
+        $term = trim($term);
+        if ($term == ''){
+            $this->error = 'Empty query given!';
+            return false;
+        }
+        
+        // match query:
+        preg_match('/^SELECT (.*?) FROM (.*)$/i', $term, $m);
+        
+        // parse returns values
+        // SELECT * FROM ...
+        // SELECT foo,bar FROM ...
+        $return_values = isset($m[1]) ? trim($m[1]) : '*';
+        if ($return_values != '*'){ 
+            $return_values = explode(',', strtolower($return_values));
+            $return_values = $this->_clean_array($return_values);                
+        }
+        
+        // match from and where part:
+        //
+        // ... FROM * WHERE $id=="one"
+        // ... FROM a WHERE $class=="red"
+        // ... FROM a 
+        // ... FROM *
+        $last = isset($m[2]) ? trim($m[2]) : '';
+        
+        $search_term = '';
+        $where_term = '';
+        
+        if (preg_match('/^(.*?) WHERE (.*?)$/i', $last, $m)){
+            $search_term = trim($m[1]);
+            $where_term = trim($m[2]);
+        }
+        else {
+            $search_term = $last;
+        }
+        
+        /*
+        ** find tags:
+        */
+
+        if ($search_term == '*'){
+            // search all
+
+            $tag_names = array();
+            $tag_attributes = array();
+            $tag_values = array();
+
+            $html = $this->page;
+            
+            $this->_extract_all_tags($html, $tag_names, $tag_attributes, $tag_values);
+            
+            $this->_match_tags($results, $return_values, $where_term, $tag_attributes, $tag_values, $tag_names);
+            
+        }
+        else {
+        
+            // search term is a tag
+                        
+            $tagname = trim($search_term);
+        
+            $tag_attributes = array();
+            $tag_values = array();
+
+            $regexp = '<'.$tagname.'([ \t].*?|)>((.*?)<\/'.$tagname.'>)?';
+            preg_match_all('/'.$regexp.'/is', $this->page, $m);
+            
+            if (count($m[0]) != 0){
+                $tag_attributes = $m[1];
+                $tag_values = $m[3];
+            }
+            
+            $this->_match_tags($results, $return_values, $where_term, $tag_attributes, $tag_values, $tagname);
+        }
+       
+        $this->results = $results;
+        
+        // was there a error during the search process?
+        if ($this->error != ''){
+            return false;
+        }
+                    
+        return true;
+    
+    }
+    
+    /*
+    ** convert_tagname_to_key
+    **
+    ** converts the tagname to the array key
+    */
+    
+    function convert_tagname_to_key(){
+            
+        $new_array = array();
+    
+        while(list($key,$val) = each($this->results)){
+            
+            if (isset($val['tagname'])){
+                $tag_name = $val['tagname'];
+                unset($val['tagname']);
+            } 
+            else { $tag_name = '(empty)'; }
+           
+            $new_array[$tag_name] = $val;
+                
+        }
+    
+        $this->results = $new_array;
+    }
+    
+    
+    /*
+    ** fetch_array
+    **
+    ** returns the results as an array
+    */
+    
+    function fetch_array(){
+        return $this->results;
+    }
+    
+    
+    /*
+    ** _array2object
+    **
+    ** converts an array to an object
+    */
+    
+    function _array2object($array) {
+
+        if (is_array($array)) {
+        
+            $obj = new StdClass();
+        
+            foreach ($array as $key => $val){        
+                $obj->$key = $val;
+            }
+        
+        }
+        else { $obj = $array; }
+        
+        return $obj;
+    }
+    
+    
+    /*
+    ** fetch_objects
+    **
+    ** returns the results as objects
+    */
+    
+    function fetch_objects(){
+        
+        if ($this->results_objects == NULL){
+        
+            $results = array();
+            
+            reset($this->results);
+            while(list($key,$val) = each($this->results)){
+                $results[$key] = $this->_array2object($val);
+            }
+        
+            $this->results_objects = $results;
+            
+            return $this->results_objects;
+        }
+        else {
+            return $this->results_objects;
+        }
+    }
+    
+    /*
+    ** get_result_count
+    **
+    ** returns the number of results
+    */
+    
+    function get_result_count(){
+        return count($this->results);
+    }
+    
+    
+    /*
+    ** _add_result
+    **
+    ** 
+    */
+    
+    function _add_result(&$results, $return_values, $tag_attributes){
+
+        if ($return_values == '*'){
+            $results[] = $tag_attributes;
+        }
+        else if (is_array($return_values)){
+        
+            $new_result = array(); 
+            
+            reset($return_values);
+            for ($t=0; $t < count($return_values); $t++){
+            
+                $_tagname = explode(' as ', $return_values[$t]);
+                $_caption = $return_values[$t];
+                
+                if (count($_tagname) != 1){ 
+                    $_caption = trim($_tagname[1]);
+                    $_tagname = trim($_tagname[0]);
+                }
+                else { $_tagname = $_caption; }
+
+                $new_result[$_caption] = isset($tag_attributes[$_tagname]) ? $tag_attributes[$_tagname] : '';
+            }
+            $results[] = $new_result;
+        }
+    }
+    
+    
+    /*
+    ** parse_attributes
+    **
+    ** parses HTML attributes and returns an array
+    */
+    
+    function parse_attributes($attrib){
+        
+        $attrib .= '>';
+        
+        $mode = 'search_key';
+        $tmp = ''; 
+        $current_key = '';
+        
+        $attributes = array();
+        
+        for ($x=0; $x < strlen($attrib); $x++){
+        
+            $char = $attrib[$x];
+            
+            if ($char == '=' and $mode == 'search_key'){
+                $current_key = trim($tmp);
+                $tmp = '';
+                $mode = 'value';
+            }
+            else if ($mode == 'search_key' and preg_match('/[ \t\s\r\n>]/', $char)){ 
+                $current_key = strtolower(trim($tmp));
+                if ($current_key != ''){ $attributes[$current_key] = ''; }
+                $tmp = ''; $current_key = '';
+            }
+            else if ($mode == 'value' and $char == '"'){ $mode = 'find_value_ending_a'; }
+            else if ($mode == 'value' and $char == '\''){ $mode = 'find_value_ending_b'; }
+            else if ($mode == 'value'){ $tmp .= $char; $mode = 'find_value_ending_c'; }
+            else if (
+                ($mode == 'find_value_ending_a' and $char == '"') or 
+                ($mode == 'find_value_ending_b' and $char == '\'') or 
+                ($mode == 'find_value_ending_c' and preg_match('/[ \t\s\r\n>]/', $char))
+            ){ 
+                
+                $mode = 'search_key';
+                
+                if ($current_key != ''){
+                    $current_key = strtolower($current_key);
+                    $attributes[$current_key] = $tmp;
+                }
+                $tmp = '';
+            }
+            else { $tmp .= $char; }                
+        }
+        
+        if ($mode != 'search_key' and $current_key != ''){ 
+            $current_key = strtolower($current_key);
+            $attributes[$current_key] = trim(preg_replace('/>+$/', '', $tmp));
+        }
+    
+        return $attributes;
+    
+    }
+
+}
+    
+?>
+\ No newline at end of file
diff --git a/readme.txt b/readme.txt
new file mode 100755
index 0000000..742a254
--- /dev/null
+++ b/readme.txt
@@ -0,0 +1,67 @@
+htmlSQL - Version 0.5 - README
+---------------------------------------------------------------------
+AUTHOR: Jonas John (http://www.jonasjohn.de/)
+
+
+DESCRIPTION:
+---------------------------------------------------------------------
+htmlSQL is a experimental PHP class which allows you to access HTML
+values by an SQL like syntax. This means that you don't have to write
+complex functions (regular expressions) to extract specific values.
+The htmlSQL queries look like this:
+
+SELECT href,title FROM a WHERE $class == "list"
+       ^ Attributes    ^       ^ search query (can be empty)
+         to return     ^ 
+                       ^ HTML tag to search in 
+                         "*" is possible = all tags
+                               
+This query returns an array with all links that contain
+the attribute class="list".
+
+All web transfers in htmlSQL are using the awesome Snoopy class 
+(package version 1.2.3 - URL: http://snoopy.sourceforge.net/)
+But for file or string queries Snoopy is not required. You find all
+Snoopy related documents (copyright, readme, etc) in the snoopy_data/ 
+folder.
+
+
+HOW TO USE:
+---------------------------------------------------------------------
+Just include the "snoopy.class.php" and the "htmlsql.class.php" files 
+into your PHP scripts and look at the examples (examples/) to get an
+idea of how to use the htmlSQL class. It should be very simple :-)
+
+
+BACKGROUND / IDEA:
+---------------------------------------------------------------------
+I had this idea while extracting some data from a website. As I realized
+that the algorithms and functions to extract links and other tags are 
+often the same - I had the idea to combine all functions to an universal
+usable class. While drinking a coffee and thinking on that problem, I 
+thought it would be cool to access HTML elements by using SQL. So I 
+started creating this class... 
+
+
+WARNING:
+---------------------------------------------------------------------
+The eval() function is used for the WHERE statement. Make sure that all 
+user data is checked and filtered against malicious PHP code. 
+Never trust user input! 
+
+
+TODO:
+---------------------------------------------------------------------
+- enhance the HTML parser
+- test htmlSQL with invalid and bad HTML files
+- replace the ugly eval() method for the WHERE statement
+  with an own method
+- more error checks
+- include the LIMIT function/method like in SQL
+
+
+LICENSE:
+---------------------------------------------------------------------
+htmlSQL uses a modified BSD license, you find the full license text 
+in the "htmlsql.class.php". 
+
diff --git a/readme_german.txt b/readme_german.txt
new file mode 100755
index 0000000..ffecc5a
--- /dev/null
+++ b/readme_german.txt
@@ -0,0 +1,85 @@
+htmlSQL - Version 0.5 - README
+---------------------------------------------------------------------
+AUTHOR: Jonas John (http://www.jonasjohn.de/)
+
+BESCHREIBUNG:
+---------------------------------------------------------------------
+htmlSQL ist eine experimentelle PHP Klasse mit der man auf HTML
+Elemente �ber eine SQL �hnliche Syntax zugreifen kann. Das
+bedeutet das man nicht mehr �ber komplizierte Funktionen
+bestimmte Tags extrahieren muss, sondern einfach eine Query
+wie diese ausf�hrt:
+
+SELECT href,title   FROM  a   WHERE $class == "liste"
+       ^ HTML Attrib.     ^         ^ Abfrage (kann auch leer sein)
+         die zur�ck-      ^
+         gegeben          ^ HTML Tags die durchsucht werden sollen 
+         werden sollen      "*" ist hier m�glich = alle Tags
+                              
+Diese Abfrage gibt einen Array aller Links mit dem Attribut class="liste"
+zur�ck.
+
+Alle HTTP Verbindungen in htmlSQL ben�tzen die wunderbare Snoopy Klasse
+(Package Version 1.2.3 - URL: http://snoopy.sourceforge.net/). 
+Allerdings wird Snoopy nicht f�r "file" oder "string" Queries ben�tigt.
+Alle Snoopy betreffenden Dokumente (z.B: Copyright-Infos, Readme, usw.)
+befinden sich im "snoopy_data/" Unterordner.
+
+
+INSTALLATION / ANWENDUNG:
+---------------------------------------------------------------------
+Um htmlSQL in eigenen Projekten zu ben�tzen ist es nur notwendig die
+zwei Dateien "snoopy.class.php" und die "htmlsql.class.php" zu laden
+(mit include oder z.B. require). Danach kann htmlSQL, wie in den 
+Beispielen (siehe examples/-Ordner), angesprochen werden. Dies sollte
+nicht allzu schwer sein :-)
+
+
+HINTERGRUND / GESCHICHTE:
+---------------------------------------------------------------------
+Ich hatte die Idee zu dieser Klasse als ich Daten von einer Web-Seite
+extrahiert habe und dabei merkte das sich die Funktionen und Quelltexte
+oftmals wiederholen. Da kam mir die Idee das ganze zu vereinfachen und
+eine universelle Klasse daf�r zu entwickeln. 
+
+
+WARNUNG:
+---------------------------------------------------------------------
+F�r die Abfragen wird die eval()-Funktion ben�tzt. Deshalb sollten alle
+vom Besucher abh�ngige Daten wie z.b. IDs gepr�ft oder ggf. gefiltert 
+werden da es ansonsten m�glich w�re schadhaften PHP Quelltext auszuf�hren.
+Vertraue niemals Benutzereingaben!
+
+
+TODO:
+---------------------------------------------------------------------
+- den internen HTML Parser verbessern
+- ein eigenes Query system entwickeln und nicht
+  das PHP eigene nutzen ( Die eval()-L�sung gef�llt mir nicht wirklich)
+- Mehr Fehlerpr�fungen
+- LIMIT Funktion einbauen
+
+
+ANWENDUNGSGEBIETE VON HTMLSQL:
+---------------------------------------------------------------------
+- Daten von anderen Web-Seiten auslesen
+- HTML basierte Datenbanken?
+- XML Daten auslesen
+
+
+LIZENZ:
+---------------------------------------------------------------------
+htmlSQL ben�tzt eine modifizierte BSD Lizenz, welche ziemlich offen ist.
+Der Lizenztext befindet sich in der "htmlsql.class.php". 
+Kurz zusammengefasst besagt er folgendes: 
+
+- Die htmlSQL Klasse kann frei in kommerziellen und nicht-kommerziellen 
+  Projekten ben�tzt werden
+- Die Klasse darf mit oder ohne �nderungen frei weitergegeben werden
+- Der Copyright-Hinweis darf nicht entfernt werden
+- Der Autor �bernimmt keine Haftung f�r eventuelle Sch�den
+- Der Name des Autors oder anderen beteiligten Autoren darf nur mit
+  schriftlicher Genehmigung ben�tzt werden um f�r Produkte, welche 
+  htmlSQL ben�tzen, zu werben
+
+
diff --git a/snoopy.class.php b/snoopy.class.php
new file mode 100755
index 0000000..77e5b73
--- /dev/null
+++ b/snoopy.class.php
@@ -0,0 +1,1257 @@
+<?php
+
+/*************************************************
+
+Snoopy - the PHP net client
+Author: Monte Ohrt <monte@ispi.net>
+Copyright (c): 1999-2000 ispi, all rights reserved
+Version: 1.01
+
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+You may contact the author of Snoopy by e-mail at:
+monte@ispi.net
+
+Or, write to:
+Monte Ohrt
+CTO, ispi
+237 S. 70th suite 220
+Lincoln, NE 68510
+
+The latest version of Snoopy can be obtained from:
+http://snoopy.sourceforge.net/
+
+*************************************************/
+
+class Snoopy
+{
+	/**** Public variables ****/
+	
+	/* user definable vars */
+
+	var $host			=	"www.php.net";		// host name we are connecting to
+	var $port			=	80;					// port we are connecting to
+	var $proxy_host		=	"";					// proxy host to use
+	var $proxy_port		=	"";					// proxy port to use
+	var $proxy_user		=	"";					// proxy user to use
+	var $proxy_pass		=	"";					// proxy password to use
+	
+	var $agent			=	"Snoopy v1.2.3";	// agent we masquerade as
+	var	$referer		=	"";					// referer info to pass
+	var $cookies		=	array();			// array of cookies to pass
+												// $cookies["username"]="joe";
+	var	$rawheaders		=	array();			// array of raw headers to send
+												// $rawheaders["Content-type"]="text/html";
+
+	var $maxredirs		=	5;					// http redirection depth maximum. 0 = disallow
+	var $lastredirectaddr	=	"";				// contains address of last redirected address
+	var	$offsiteok		=	true;				// allows redirection off-site
+	var $maxframes		=	0;					// frame content depth maximum. 0 = disallow
+	var $expandlinks	=	true;				// expand links to fully qualified URLs.
+												// this only applies to fetchlinks()
+												// submitlinks(), and submittext()
+	var $passcookies	=	true;				// pass set cookies back through redirects
+												// NOTE: this currently does not respect
+												// dates, domains or paths.
+	
+	var	$user			=	"";					// user for http authentication
+	var	$pass			=	"";					// password for http authentication
+	
+	// http accept types
+	var $accept			=	"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
+	
+	var $results		=	"";					// where the content is put
+		
+	var $error			=	"";					// error messages sent here
+	var	$response_code	=	"";					// response code returned from server
+	var	$headers		=	array();			// headers returned from server sent here
+	var	$maxlength		=	500000;				// max return data length (body)
+	var $read_timeout	=	0;					// timeout on read operations, in seconds
+												// supported only since PHP 4 Beta 4
+												// set to 0 to disallow timeouts
+	var $timed_out		=	false;				// if a read operation timed out
+	var	$status			=	0;					// http request status
+
+	var $temp_dir		=	"/tmp";				// temporary directory that the webserver
+												// has permission to write to.
+												// under Windows, this should be C:\temp
+
+	var	$curl_path		=	"/usr/local/bin/curl";
+												// Snoopy will use cURL for fetching
+												// SSL content if a full system path to
+												// the cURL binary is supplied here.
+												// set to false if you do not have
+												// cURL installed. See http://curl.haxx.se
+												// for details on installing cURL.
+												// Snoopy does *not* use the cURL
+												// library functions built into php,
+												// as these functions are not stable
+												// as of this Snoopy release.
+	
+	/**** Private variables ****/	
+	
+	var	$_maxlinelen	=	4096;				// max line length (headers)
+	
+	var $_httpmethod	=	"GET";				// default http request method
+	var $_httpversion	=	"HTTP/1.0";			// default http request version
+	var $_submit_method	=	"POST";				// default submit method
+	var $_submit_type	=	"application/x-www-form-urlencoded";	// default submit type
+	var $_mime_boundary	=   "";					// MIME boundary for multipart/form-data submit type
+	var $_redirectaddr	=	false;				// will be set if page fetched is a redirect
+	var $_redirectdepth	=	0;					// increments on an http redirect
+	var $_frameurls		= 	array();			// frame src urls
+	var $_framedepth	=	0;					// increments on frame depth
+	
+	var $_isproxy		=	false;				// set if using a proxy server
+	var $_fp_timeout	=	30;					// timeout for socket connection
+
+/*======================================================================*\
+	Function:	fetch
+	Purpose:	fetch the contents of a web page
+				(and possibly other protocols in the
+				future like ftp, nntp, gopher, etc.)
+	Input:		$URI	the location of the page to fetch
+	Output:		$this->results	the output text from the fetch
+\*======================================================================*/
+
+	function fetch($URI)
+	{
+	
+		//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
+		$URI_PARTS = parse_url($URI);
+		if (!empty($URI_PARTS["user"]))
+			$this->user = $URI_PARTS["user"];
+		if (!empty($URI_PARTS["pass"]))
+			$this->pass = $URI_PARTS["pass"];
+		if (empty($URI_PARTS["query"]))
+			$URI_PARTS["query"] = '';
+		if (empty($URI_PARTS["path"]))
+			$URI_PARTS["path"] = '';
+				
+		switch(strtolower($URI_PARTS["scheme"]))
+		{
+			case "http":
+				$this->host = $URI_PARTS["host"];
+				if(!empty($URI_PARTS["port"]))
+					$this->port = $URI_PARTS["port"];
+				if($this->_connect($fp))
+				{
+					if($this->_isproxy)
+					{
+						// using proxy, send entire URI
+						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
+					}
+					else
+					{
+						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
+						// no proxy, send only the path
+						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
+					}
+					
+					$this->_disconnect($fp);
+
+					if($this->_redirectaddr)
+					{
+						/* url was redirected, check if we've hit the max depth */
+						if($this->maxredirs > $this->_redirectdepth)
+						{
+							// only follow redirect if it's on this site, or offsiteok is true
+							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
+							{
+								/* follow the redirect */
+								$this->_redirectdepth++;
+								$this->lastredirectaddr=$this->_redirectaddr;
+								$this->fetch($this->_redirectaddr);
+							}
+						}
+					}
+
+					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
+					{
+						$frameurls = $this->_frameurls;
+						$this->_frameurls = array();
+						
+						while(list(,$frameurl) = each($frameurls))
+						{
+							if($this->_framedepth < $this->maxframes)
+							{
+								$this->fetch($frameurl);
+								$this->_framedepth++;
+							}
+							else
+								break;
+						}
+					}					
+				}
+				else
+				{
+					return false;
+				}
+				return true;					
+				break;
+			case "https":
+				if(!$this->curl_path)
+					return false;
+				if(function_exists("is_executable"))
+				    if (!is_executable($this->curl_path))
+				        return false;
+				$this->host = $URI_PARTS["host"];
+				if(!empty($URI_PARTS["port"]))
+					$this->port = $URI_PARTS["port"];
+				if($this->_isproxy)
+				{
+					// using proxy, send entire URI
+					$this->_httpsrequest($URI,$URI,$this->_httpmethod);
+				}
+				else
+				{
+					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
+					// no proxy, send only the path
+					$this->_httpsrequest($path, $URI, $this->_httpmethod);
+				}
+
+				if($this->_redirectaddr)
+				{
+					/* url was redirected, check if we've hit the max depth */
+					if($this->maxredirs > $this->_redirectdepth)
+					{
+						// only follow redirect if it's on this site, or offsiteok is true
+						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
+						{
+							/* follow the redirect */
+							$this->_redirectdepth++;
+							$this->lastredirectaddr=$this->_redirectaddr;
+							$this->fetch($this->_redirectaddr);
+						}
+					}
+				}
+
+				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
+				{
+					$frameurls = $this->_frameurls;
+					$this->_frameurls = array();
+
+					while(list(,$frameurl) = each($frameurls))
+					{
+						if($this->_framedepth < $this->maxframes)
+						{
+							$this->fetch($frameurl);
+							$this->_framedepth++;
+						}
+						else
+							break;
+					}
+				}					
+				return true;					
+				break;
+			default:
+				// not a valid protocol
+				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
+				return false;
+				break;
+		}		
+		return true;
+	}
+
+/*======================================================================*\
+	Function:	submit
+	Purpose:	submit an http form
+	Input:		$URI	the location to post the data
+				$formvars	the formvars to use.
+					format: $formvars["var"] = "val";
+				$formfiles  an array of files to submit
+					format: $formfiles["var"] = "/dir/filename.ext";
+	Output:		$this->results	the text output from the post
+\*======================================================================*/
+
+	function submit($URI, $formvars="", $formfiles="")
+	{
+		unset($postdata);
+		
+		$postdata = $this->_prepare_post_body($formvars, $formfiles);
+			
+		$URI_PARTS = parse_url($URI);
+		if (!empty($URI_PARTS["user"]))
+			$this->user = $URI_PARTS["user"];
+		if (!empty($URI_PARTS["pass"]))
+			$this->pass = $URI_PARTS["pass"];
+		if (empty($URI_PARTS["query"]))
+			$URI_PARTS["query"] = '';
+		if (empty($URI_PARTS["path"]))
+			$URI_PARTS["path"] = '';
+
+		switch(strtolower($URI_PARTS["scheme"]))
+		{
+			case "http":
+				$this->host = $URI_PARTS["host"];
+				if(!empty($URI_PARTS["port"]))
+					$this->port = $URI_PARTS["port"];
+				if($this->_connect($fp))
+				{
+					if($this->_isproxy)
+					{
+						// using proxy, send entire URI
+						$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
+					}
+					else
+					{
+						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
+						// no proxy, send only the path
+						$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
+					}
+					
+					$this->_disconnect($fp);
+
+					if($this->_redirectaddr)
+					{
+						/* url was redirected, check if we've hit the max depth */
+						if($this->maxredirs > $this->_redirectdepth)
+						{						
+							if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
+								$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);						
+							
+							// only follow redirect if it's on this site, or offsiteok is true
+							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
+							{
+								/* follow the redirect */
+								$this->_redirectdepth++;
+								$this->lastredirectaddr=$this->_redirectaddr;
+								if( strpos( $this->_redirectaddr, "?" ) > 0 )
+									$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
+								else
+									$this->submit($this->_redirectaddr,$formvars, $formfiles);
+							}
+						}
+					}
+
+					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
+					{
+						$frameurls = $this->_frameurls;
+						$this->_frameurls = array();
+						
+						while(list(,$frameurl) = each($frameurls))
+						{														
+							if($this->_framedepth < $this->maxframes)
+							{
+								$this->fetch($frameurl);
+								$this->_framedepth++;
+							}
+							else
+								break;
+						}
+					}					
+					
+				}
+				else
+				{
+					return false;
+				}
+				return true;					
+				break;
+			case "https":
+				if(!$this->curl_path)
+					return false;
+				if(function_exists("is_executable"))
+				    if (!is_executable($this->curl_path))
+				        return false;
+				$this->host = $URI_PARTS["host"];
+				if(!empty($URI_PARTS["port"]))
+					$this->port = $URI_PARTS["port"];
+				if($this->_isproxy)
+				{
+					// using proxy, send entire URI
+					$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
+				}
+				else
+				{
+					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
+					// no proxy, send only the path
+					$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
+				}
+
+				if($this->_redirectaddr)
+				{
+					/* url was redirected, check if we've hit the max depth */
+					if($this->maxredirs > $this->_redirectdepth)
+					{						
+						if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
+							$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);						
+
+						// only follow redirect if it's on this site, or offsiteok is true
+						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
+						{
+							/* follow the redirect */
+							$this->_redirectdepth++;
+							$this->lastredirectaddr=$this->_redirectaddr;
+							if( strpos( $this->_redirectaddr, "?" ) > 0 )
+								$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
+							else
+								$this->submit($this->_redirectaddr,$formvars, $formfiles);
+						}
+					}
+				}
+
+				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
+				{
+					$frameurls = $this->_frameurls;
+					$this->_frameurls = array();
+
+					while(list(,$frameurl) = each($frameurls))
+					{														
+						if($this->_framedepth < $this->maxframes)
+						{
+							$this->fetch($frameurl);
+							$this->_framedepth++;
+						}
+						else
+							break;
+					}
+				}					
+				return true;					
+				break;
+				
+			default:
+				// not a valid protocol
+				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
+				return false;
+				break;
+		}		
+		return true;
+	}
+
+/*======================================================================*\
+	Function:	fetchlinks
+	Purpose:	fetch the links from a web page
+	Input:		$URI	where you are fetching from
+	Output:		$this->results	an array of the URLs
+\*======================================================================*/
+
+	function fetchlinks($URI)
+	{
+		if ($this->fetch($URI))
+		{			
+			if($this->lastredirectaddr)
+				$URI = $this->lastredirectaddr;
+			if(is_array($this->results))
+			{
+				for($x=0;$x<count($this->results);$x++)
+					$this->results[$x] = $this->_striplinks($this->results[$x]);
+			}
+			else
+				$this->results = $this->_striplinks($this->results);
+
+			if($this->expandlinks)
+				$this->results = $this->_expandlinks($this->results, $URI);
+			return true;
+		}
+		else
+			return false;
+	}
+
+/*======================================================================*\
+	Function:	fetchform
+	Purpose:	fetch the form elements from a web page
+	Input:		$URI	where you are fetching from
+	Output:		$this->results	the resulting html form
+\*======================================================================*/
+
+	function fetchform($URI)
+	{
+		
+		if ($this->fetch($URI))
+		{			
+
+			if(is_array($this->results))
+			{
+				for($x=0;$x<count($this->results);$x++)
+					$this->results[$x] = $this->_stripform($this->results[$x]);
+			}
+			else
+				$this->results = $this->_stripform($this->results);
+			
+			return true;
+		}
+		else
+			return false;
+	}
+	
+	
+/*======================================================================*\
+	Function:	fetchtext
+	Purpose:	fetch the text from a web page, stripping the links
+	Input:		$URI	where you are fetching from
+	Output:		$this->results	the text from the web page
+\*======================================================================*/
+
+	function fetchtext($URI)
+	{
+		if($this->fetch($URI))
+		{			
+			if(is_array($this->results))
+			{
+				for($x=0;$x<count($this->results);$x++)
+					$this->results[$x] = $this->_striptext($this->results[$x]);
+			}
+			else
+				$this->results = $this->_striptext($this->results);
+			return true;
+		}
+		else
+			return false;
+	}
+
+/*======================================================================*\
+	Function:	submitlinks
+	Purpose:	grab links from a form submission
+	Input:		$URI	where you are submitting from
+	Output:		$this->results	an array of the links from the post
+\*======================================================================*/
+
+	function submitlinks($URI, $formvars="", $formfiles="")
+	{
+		if($this->submit($URI,$formvars, $formfiles))
+		{			
+			if($this->lastredirectaddr)
+				$URI = $this->lastredirectaddr;
+			if(is_array($this->results))
+			{
+				for($x=0;$x<count($this->results);$x++)
+				{
+					$this->results[$x] = $this->_striplinks($this->results[$x]);
+					if($this->expandlinks)
+						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
+				}
+			}
+			else
+			{
+				$this->results = $this->_striplinks($this->results);
+				if($this->expandlinks)
+					$this->results = $this->_expandlinks($this->results,$URI);
+			}
+			return true;
+		}
+		else
+			return false;
+	}
+
+/*======================================================================*\
+	Function:	submittext
+	Purpose:	grab text from a form submission
+	Input:		$URI	where you are submitting from
+	Output:		$this->results	the text from the web page
+\*======================================================================*/
+
+	function submittext($URI, $formvars = "", $formfiles = "")
+	{
+		if($this->submit($URI,$formvars, $formfiles))
+		{			
+			if($this->lastredirectaddr)
+				$URI = $this->lastredirectaddr;
+			if(is_array($this->results))
+			{
+				for($x=0;$x<count($this->results);$x++)
+				{
+					$this->results[$x] = $this->_striptext($this->results[$x]);
+					if($this->expandlinks)
+						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
+				}
+			}
+			else
+			{
+				$this->results = $this->_striptext($this->results);
+				if($this->expandlinks)
+					$this->results = $this->_expandlinks($this->results,$URI);
+			}
+			return true;
+		}
+		else
+			return false;
+	}
+
+	
+
+/*======================================================================*\
+	Function:	set_submit_multipart
+	Purpose:	Set the form submission content type to
+				multipart/form-data
+\*======================================================================*/
+	function set_submit_multipart()
+	{
+		$this->_submit_type = "multipart/form-data";
+	}
+
+	
+/*======================================================================*\
+	Function:	set_submit_normal
+	Purpose:	Set the form submission content type to
+				application/x-www-form-urlencoded
+\*======================================================================*/
+	function set_submit_normal()
+	{
+		$this->_submit_type = "application/x-www-form-urlencoded";
+	}
+
+	
+	
+
+/*======================================================================*\
+	Private functions
+\*======================================================================*/
+	
+	
+/*======================================================================*\
+	Function:	_striplinks
+	Purpose:	strip the hyperlinks from an html document
+	Input:		$document	document to strip.
+	Output:		$match		an array of the links
+\*======================================================================*/
+
+	function _striplinks($document)
+	{	
+		preg_match_all("'<\s*a\s.*?href\s*=\s*			# find <a href=
+						([\"\'])?					# find single or double quote
+						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching
+													# quote, otherwise match up to next space
+						'isx",$document,$links);
+						
+
+		// catenate the non-empty matches from the conditional subpattern
+
+		while(list($key,$val) = each($links[2]))
+		{
+			if(!empty($val))
+				$match[] = $val;
+		}				
+		
+		while(list($key,$val) = each($links[3]))
+		{
+			if(!empty($val))
+				$match[] = $val;
+		}		
+		
+		// return the links
+		return $match;
+	}
+
+/*======================================================================*\
+	Function:	_stripform
+	Purpose:	strip the form elements from an html document
+	Input:		$document	document to strip.
+	Output:		$match		an array of the links
+\*======================================================================*/
+
+	function _stripform($document)
+	{	
+		preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
+		
+		// catenate the matches
+		$match = implode("\r\n",$elements[0]);
+				
+		// return the links
+		return $match;
+	}
+
+	
+	
+/*======================================================================*\
+	Function:	_striptext
+	Purpose:	strip the text from an html document
+	Input:		$document	document to strip.
+	Output:		$text		the resulting text
+\*======================================================================*/
+
+	function _striptext($document)
+	{
+		
+		// I didn't use preg eval (//e) since that is only available in PHP 4.0.
+		// so, list your entities one by one here. I included some of the
+		// more common ones.
+								
+		$search = array("'<script[^>]*?>.*?</script>'si",	// strip out javascript
+						"'<[\/\!]*?[^<>]*?>'si",			// strip out html tags
+						"'([\r\n])[\s]+'",					// strip out white space
+						"'&(quot|#34|#034|#x22);'i",		// replace html entities
+						"'&(amp|#38|#038|#x26);'i",			// added hexadecimal values
+						"'&(lt|#60|#060|#x3c);'i",
+						"'&(gt|#62|#062|#x3e);'i",
+						"'&(nbsp|#160|#xa0);'i",
+						"'&(iexcl|#161);'i",
+						"'&(cent|#162);'i",
+						"'&(pound|#163);'i",
+						"'&(copy|#169);'i",
+						"'&(reg|#174);'i",
+						"'&(deg|#176);'i",
+						"'&(#39|#039|#x27);'",
+						"'&(euro|#8364);'i",				// europe
+						"'&a(uml|UML);'",					// german
+						"'&o(uml|UML);'",
+						"'&u(uml|UML);'",
+						"'&A(uml|UML);'",
+						"'&O(uml|UML);'",
+						"'&U(uml|UML);'",
+						"'&szlig;'i",
+						);
+		$replace = array(	"",
+							"",
+							"\\1",
+							"\"",
+							"&",
+							"<",
+							">",
+							" ",
+							chr(161),
+							chr(162),
+							chr(163),
+							chr(169),
+							chr(174),
+							chr(176),
+							chr(39),
+							chr(128),
+							"�",
+							"�",
+							"�",
+							"�",
+							"�",
+							"�",
+							"�",
+						);
+					
+		$text = preg_replace($search,$replace,$document);
+								
+		return $text;
+	}
+
+/*======================================================================*\
+	Function:	_expandlinks
+	Purpose:	expand each link into a fully qualified URL
+	Input:		$links			the links to qualify
+				$URI			the full URI to get the base from
+	Output:		$expandedLinks	the expanded links
+\*======================================================================*/
+
+	function _expandlinks($links,$URI)
+	{
+		
+		preg_match("/^[^\?]+/",$URI,$match);
+
+		$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
+		$match = preg_replace("|/$|","",$match);
+		$match_part = parse_url($match);
+		$match_root =
+		$match_part["scheme"]."://".$match_part["host"];
+				
+		$search = array( 	"|^http://".preg_quote($this->host)."|i",
+							"|^(\/)|i",
+							"|^(?!http://)(?!mailto:)|i",
+							"|/\./|",
+							"|/[^\/]+/\.\./|"
+						);
+						
+		$replace = array(	"",
+							$match_root."/",
+							$match."/",
+							"/",
+							"/"
+						);			
+				
+		$expandedLinks = preg_replace($search,$replace,$links);
+
+		return $expandedLinks;
+	}
+
+/*======================================================================*\
+	Function:	_httprequest
+	Purpose:	go get the http data from the server
+	Input:		$url		the url to fetch
+				$fp			the current open file pointer
+				$URI		the full URI
+				$body		body contents to send if any (POST)
+	Output:		
+\*======================================================================*/
+	
+	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
+	{
+		$cookie_headers = '';
+		if($this->passcookies && $this->_redirectaddr)
+			$this->setcookies();
+			
+		$URI_PARTS = parse_url($URI);
+		if(empty($url))
+			$url = "/";
+		$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";		
+		if(!empty($this->agent))
+			$headers .= "User-Agent: ".$this->agent."\r\n";
+		if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
+			$headers .= "Host: ".$this->host;
+			if(!empty($this->port))
+				$headers .= ":".$this->port;
+			$headers .= "\r\n";
+		}
+		if(!empty($this->accept))
+			$headers .= "Accept: ".$this->accept."\r\n";
+		if(!empty($this->referer))
+			$headers .= "Referer: ".$this->referer."\r\n";
+		if(!empty($this->cookies))
+		{			
+			if(!is_array($this->cookies))
+				$this->cookies = (array)$this->cookies;
+	
+			reset($this->cookies);
+			if ( count($this->cookies) > 0 ) {
+				$cookie_headers .= 'Cookie: ';
+				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
+				$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
+				}
+				$headers .= substr($cookie_headers,0,-2) . "\r\n";
+			} 
+		}
+		if(!empty($this->rawheaders))
+		{
+			if(!is_array($this->rawheaders))
+				$this->rawheaders = (array)$this->rawheaders;
+			while(list($headerKey,$headerVal) = each($this->rawheaders))
+				$headers .= $headerKey.": ".$headerVal."\r\n";
+		}
+		if(!empty($content_type)) {
+			$headers .= "Content-type: $content_type";
+			if ($content_type == "multipart/form-data")
+				$headers .= "; boundary=".$this->_mime_boundary;
+			$headers .= "\r\n";
+		}
+		if(!empty($body))	
+			$headers .= "Content-length: ".strlen($body)."\r\n";
+		if(!empty($this->user) || !empty($this->pass))	
+			$headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
+		
+		//add proxy auth headers
+		if(!empty($this->proxy_user))	
+			$headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
+
+
+		$headers .= "\r\n";
+		
+		// set the read timeout if needed
+		if ($this->read_timeout > 0)
+			socket_set_timeout($fp, $this->read_timeout);
+		$this->timed_out = false;
+		
+		fwrite($fp,$headers.$body,strlen($headers.$body));
+		
+		$this->_redirectaddr = false;
+		unset($this->headers);
+						
+		while($currentHeader = fgets($fp,$this->_maxlinelen))
+		{
+			if ($this->read_timeout > 0 && $this->_check_timeout($fp))
+			{
+				$this->status=-100;
+				return false;
+			}
+				
+			if($currentHeader == "\r\n")
+				break;
+						
+			// if a header begins with Location: or URI:, set the redirect
+			if(preg_match("/^(Location:|URI:)/i",$currentHeader))
+			{
+				// get URL portion of the redirect
+				preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
+				// look for :// in the Location header to see if hostname is included
+				if(!preg_match("|\:\/\/|",$matches[2]))
+				{
+					// no host in the path, so prepend
+					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
+					// eliminate double slash
+					if(!preg_match("|^/|",$matches[2]))
+							$this->_redirectaddr .= "/".$matches[2];
+					else
+							$this->_redirectaddr .= $matches[2];
+				}
+				else
+					$this->_redirectaddr = $matches[2];
+			}
+		
+			if(preg_match("|^HTTP/|",$currentHeader))
+			{
+                if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
+				{
+					$this->status= $status[1];
+                }				
+				$this->response_code = $currentHeader;
+			}
+				
+			$this->headers[] = $currentHeader;
+		}
+
+		$results = '';
+		do {
+    		$_data = fread($fp, $this->maxlength);
+    		if (strlen($_data) == 0) {
+        		break;
+    		}
+    		$results .= $_data;
+		} while(true);
+
+		if ($this->read_timeout > 0 && $this->_check_timeout($fp))
+		{
+			$this->status=-100;
+			return false;
+		}
+		
+		// check if there is a a redirect meta tag
+		
+		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
+
+		{
+			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);	
+		}
+
+		// have we hit our frame depth and is there frame src to fetch?
+		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
+		{
+			$this->results[] = $results;
+			for($x=0; $x<count($match[1]); $x++)
+				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
+		}
+		// have we already fetched framed content?
+		elseif(is_array($this->results))
+			$this->results[] = $results;
+		// no framed content
+		else
+			$this->results = $results;
+		
+		return true;
+	}
+
+/*======================================================================*\
+	Function:	_httpsrequest
+	Purpose:	go get the https data from the server using curl
+	Input:		$url		the url to fetch
+				$URI		the full URI
+				$body		body contents to send if any (POST)
+	Output:		
+\*======================================================================*/
+	
+	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
+	{
+		if($this->passcookies && $this->_redirectaddr)
+			$this->setcookies();
+
+		$headers = array();		
+					
+		$URI_PARTS = parse_url($URI);
+		if(empty($url))
+			$url = "/";
+		// GET ... header not needed for curl
+		//$headers[] = $http_method." ".$url." ".$this->_httpversion;		
+		if(!empty($this->agent))
+			$headers[] = "User-Agent: ".$this->agent;
+		if(!empty($this->host))
+			if(!empty($this->port))
+				$headers[] = "Host: ".$this->host.":".$this->port;
+			else
+				$headers[] = "Host: ".$this->host;
+		if(!empty($this->accept))
+			$headers[] = "Accept: ".$this->accept;
+		if(!empty($this->referer))
+			$headers[] = "Referer: ".$this->referer;
+		if(!empty($this->cookies))
+		{			
+			if(!is_array($this->cookies))
+				$this->cookies = (array)$this->cookies;
+	
+			reset($this->cookies);
+			if ( count($this->cookies) > 0 ) {
+				$cookie_str = 'Cookie: ';
+				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
+				$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
+				}
+				$headers[] = substr($cookie_str,0,-2);
+			}
+		}
+		if(!empty($this->rawheaders))
+		{
+			if(!is_array($this->rawheaders))
+				$this->rawheaders = (array)$this->rawheaders;
+			while(list($headerKey,$headerVal) = each($this->rawheaders))
+				$headers[] = $headerKey.": ".$headerVal;
+		}
+		if(!empty($content_type)) {
+			if ($content_type == "multipart/form-data")
+				$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
+			else
+				$headers[] = "Content-type: $content_type";
+		}
+		if(!empty($body))	
+			$headers[] = "Content-length: ".strlen($body);
+		if(!empty($this->user) || !empty($this->pass))	
+			$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
+			
+		for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
+			$safer_header = strtr( $headers[$curr_header], "\"", " " );
+			$cmdline_params .= " -H \"".$safer_header."\"";
+		}
+		
+		if(!empty($body))
+			$cmdline_params .= " -d \"$body\"";
+		
+		if($this->read_timeout > 0)
+			$cmdline_params .= " -m ".$this->read_timeout;
+		
+		$headerfile = tempnam($temp_dir, "sno");
+
+		$safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
+		exec($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\"",$results,$return);
+		
+		if($return)
+		{
+			$this->error = "Error: cURL could not retrieve the document, error $return.";
+			return false;
+		}
+			
+			
+		$results = implode("\r\n",$results);
+		
+		$result_headers = file("$headerfile");
+						
+		$this->_redirectaddr = false;
+		unset($this->headers);
+						
+		for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
+		{
+			
+			// if a header begins with Location: or URI:, set the redirect
+			if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
+			{
+				// get URL portion of the redirect
+				preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
+				// look for :// in the Location header to see if hostname is included
+				if(!preg_match("|\:\/\/|",$matches[2]))
+				{
+					// no host in the path, so prepend
+					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
+					// eliminate double slash
+					if(!preg_match("|^/|",$matches[2]))
+							$this->_redirectaddr .= "/".$matches[2];
+					else
+							$this->_redirectaddr .= $matches[2];
+				}
+				else
+					$this->_redirectaddr = $matches[2];
+			}
+		
+			if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
+				$this->response_code = $result_headers[$currentHeader];
+
+			$this->headers[] = $result_headers[$currentHeader];
+		}
+
+		// check if there is a a redirect meta tag
+		
+		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
+		{
+			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);	
+		}
+
+		// have we hit our frame depth and is there frame src to fetch?
+		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
+		{
+			$this->results[] = $results;
+			for($x=0; $x<count($match[1]); $x++)
+				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
+		}
+		// have we already fetched framed content?
+		elseif(is_array($this->results))
+			$this->results[] = $results;
+		// no framed content
+		else
+			$this->results = $results;
+
+		unlink("$headerfile");
+		
+		return true;
+	}
+
+/*======================================================================*\
+	Function:	setcookies()
+	Purpose:	set cookies for a redirection
+\*======================================================================*/
+	
+	function setcookies()
+	{
+		for($x=0; $x<count($this->headers); $x++)
+		{
+		if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
+			$this->cookies[$match[1]] = urldecode($match[2]);
+		}
+	}
+
+	
+/*======================================================================*\
+	Function:	_check_timeout
+	Purpose:	checks whether timeout has occurred
+	Input:		$fp	file pointer
+\*======================================================================*/
+
+	function _check_timeout($fp)
+	{
+		if ($this->read_timeout > 0) {
+			$fp_status = socket_get_status($fp);
+			if ($fp_status["timed_out"]) {
+				$this->timed_out = true;
+				return true;
+			}
+		}
+		return false;
+	}
+
+/*======================================================================*\
+	Function:	_connect
+	Purpose:	make a socket connection
+	Input:		$fp	file pointer
+\*======================================================================*/
+	
+	function _connect(&$fp)
+	{
+		if(!empty($this->proxy_host) && !empty($this->proxy_port))
+			{
+				$this->_isproxy = true;
+				
+				$host = $this->proxy_host;
+				$port = $this->proxy_port;
+			}
+		else
+		{
+			$host = $this->host;
+			$port = $this->port;
+		}
+	
+		$this->status = 0;
+		
+		if($fp = fsockopen(
+					$host,
+					$port,
+					$errno,
+					$errstr,
+					$this->_fp_timeout
+					))
+		{
+			// socket connection succeeded
+
+			return true;
+		}
+		else
+		{
+			// socket connection failed
+			$this->status = $errno;
+			switch($errno)
+			{
+				case -3:
+					$this->error="socket creation failed (-3)";
+				case -4:
+					$this->error="dns lookup failure (-4)";
+				case -5:
+					$this->error="connection refused or timed out (-5)";
+				default:
+					$this->error="connection failed (".$errno.")";
+			}
+			return false;
+		}
+	}
+/*======================================================================*\
+	Function:	_disconnect
+	Purpose:	disconnect a socket connection
+	Input:		$fp	file pointer
+\*======================================================================*/
+	
+	function _disconnect($fp)
+	{
+		return(fclose($fp));
+	}
+
+	
+/*======================================================================*\
+	Function:	_prepare_post_body
+	Purpose:	Prepare post body according to encoding type
+	Input:		$formvars  - form variables
+				$formfiles - form upload files
+	Output:		post body
+\*======================================================================*/
+	
+	function _prepare_post_body($formvars, $formfiles)
+	{
+		settype($formvars, "array");
+		settype($formfiles, "array");
+		$postdata = '';
+
+		if (count($formvars) == 0 && count($formfiles) == 0)
+			return;
+		
+		switch ($this->_submit_type) {
+			case "application/x-www-form-urlencoded":
+				reset($formvars);
+				while(list($key,$val) = each($formvars)) {
+					if (is_array($val) || is_object($val)) {
+						while (list($cur_key, $cur_val) = each($val)) {
+							$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
+						}
+					} else
+						$postdata .= urlencode($key)."=".urlencode($val)."&";
+				}
+				break;
+
+			case "multipart/form-data":
+				$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
+				
+				reset($formvars);
+				while(list($key,$val) = each($formvars)) {
+					if (is_array($val) || is_object($val)) {
+						while (list($cur_key, $cur_val) = each($val)) {
+							$postdata .= "--".$this->_mime_boundary."\r\n";
+							$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
+							$postdata .= "$cur_val\r\n";
+						}
+					} else {
+						$postdata .= "--".$this->_mime_boundary."\r\n";
+						$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
+						$postdata .= "$val\r\n";
+					}
+				}
+				
+				reset($formfiles);
+				while (list($field_name, $file_names) = each($formfiles)) {
+					settype($file_names, "array");
+					while (list(, $file_name) = each($file_names)) {
+						if (!is_readable($file_name)) continue;
+
+						$fp = fopen($file_name, "r");
+						$file_content = fread($fp, filesize($file_name));
+						fclose($fp);
+						$base_name = basename($file_name);
+
+						$postdata .= "--".$this->_mime_boundary."\r\n";
+						$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
+						$postdata .= "$file_content\r\n";
+					}
+				}
+				$postdata .= "--".$this->_mime_boundary."--\r\n";
+				break;
+		}
+
+		return $postdata;
+	}
+}
+
+?>
author	Jonas John <jonas@jonasjohn.de>	2012-02-09 13:42:59 +0400
committer	Jonas John <jonas@jonasjohn.de>	2012-02-09 13:42:59 +0400
commit	a44eee9e97a7851596e26de2c9e53fa569df7389 (patch)
tree	27904364de31aad4ddfab8bc006261fc6a382382