<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="ru">
		<id>http://nfex.ru/index.php?action=history&amp;feed=atom&amp;title=Csharp%2FC_Sharp%2FNetwork%2FWeb_Crawler</id>
		<title>Csharp/C Sharp/Network/Web Crawler - История изменений</title>
		<link rel="self" type="application/atom+xml" href="http://nfex.ru/index.php?action=history&amp;feed=atom&amp;title=Csharp%2FC_Sharp%2FNetwork%2FWeb_Crawler"/>
		<link rel="alternate" type="text/html" href="http://nfex.ru/index.php?title=Csharp/C_Sharp/Network/Web_Crawler&amp;action=history"/>
		<updated>2026-04-29T19:52:14Z</updated>
		<subtitle>История изменений этой страницы в вики</subtitle>
		<generator>MediaWiki 1.30.0</generator>

	<entry>
		<id>http://nfex.ru/index.php?title=Csharp/C_Sharp/Network/Web_Crawler&amp;diff=1483&amp;oldid=prev</id>
		<title> в 15:31, 26 мая 2010</title>
		<link rel="alternate" type="text/html" href="http://nfex.ru/index.php?title=Csharp/C_Sharp/Network/Web_Crawler&amp;diff=1483&amp;oldid=prev"/>
				<updated>2010-05-26T15:31:19Z</updated>
		
		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;tr style=&quot;vertical-align: top;&quot; lang=&quot;ru&quot;&gt;
				&lt;td colspan=&quot;1&quot; style=&quot;background-color: white; color:black; text-align: center;&quot;&gt;← Предыдущая&lt;/td&gt;
				&lt;td colspan=&quot;1&quot; style=&quot;background-color: white; color:black; text-align: center;&quot;&gt;Версия 15:31, 26 мая 2010&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; style=&quot;text-align: center;&quot; lang=&quot;ru&quot;&gt;&lt;div class=&quot;mw-diff-empty&quot;&gt;(нет различий)&lt;/div&gt;
&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;</summary>
			</entry>

	<entry>
		<id>http://nfex.ru/index.php?title=Csharp/C_Sharp/Network/Web_Crawler&amp;diff=1484&amp;oldid=prev</id>
		<title>Admin: 1 версия</title>
		<link rel="alternate" type="text/html" href="http://nfex.ru/index.php?title=Csharp/C_Sharp/Network/Web_Crawler&amp;diff=1484&amp;oldid=prev"/>
				<updated>2010-05-26T11:47:09Z</updated>
		
		<summary type="html">&lt;p&gt;1 версия&lt;/p&gt;
&lt;p&gt;&lt;b&gt;Новая страница&lt;/b&gt;&lt;/p&gt;&lt;div&gt;==Build the DownloadString==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
&amp;lt;source lang=&amp;quot;csharp&amp;quot;&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
using System;&lt;br /&gt;
using System.IO;&lt;br /&gt;
using System.Net;&lt;br /&gt;
using System.Text.RegularExpressions;&lt;br /&gt;
class MainClass {&lt;br /&gt;
    private static void Main() {&lt;br /&gt;
        string remoteUri = &amp;quot;http://www.apress.ru&amp;quot;;&lt;br /&gt;
        WebClient client = new WebClient();&lt;br /&gt;
        string str = client.DownloadString(remoteUri);&lt;br /&gt;
        MatchCollection matches = Regex.Matches(str, @&amp;quot;http\S+[^-,;:?]\.gif&amp;quot;);&lt;br /&gt;
        foreach (Match match in matches) {&lt;br /&gt;
            foreach (Group grp in match.Groups) {&lt;br /&gt;
                string file = grp.Value.Substring(grp.Value.LastIndexOf(&amp;quot;/&amp;quot;) + 1);&lt;br /&gt;
                try {&lt;br /&gt;
                    Console.WriteLine(&amp;quot;Downloading {0} to file {1}&amp;quot;, grp.Value, file);&lt;br /&gt;
                    client.DownloadFile(new Uri(grp.Value), file);&lt;br /&gt;
                } catch {&lt;br /&gt;
                    Console.WriteLine(&amp;quot;Failed to download {0}&amp;quot;, grp.Value);&lt;br /&gt;
                }&lt;br /&gt;
            }&lt;br /&gt;
        }&lt;br /&gt;
    }&lt;br /&gt;
}&lt;br /&gt;
           &lt;br /&gt;
       &amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Check the ContentType==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
&amp;lt;source lang=&amp;quot;csharp&amp;quot;&amp;gt;&lt;br /&gt;
&lt;br /&gt;
using System;&lt;br /&gt;
using System.IO;&lt;br /&gt;
using System.Net;&lt;br /&gt;
   &lt;br /&gt;
class HtmlDump&lt;br /&gt;
{&lt;br /&gt;
     public static int Main(string[] astrArgs)&lt;br /&gt;
     {&lt;br /&gt;
          WebRequest webreq;&lt;br /&gt;
          WebResponse webres;&lt;br /&gt;
   &lt;br /&gt;
          try&lt;br /&gt;
          {&lt;br /&gt;
               webreq = WebRequest.Create(&amp;quot;http://www.nfex.ru/&amp;quot;);&lt;br /&gt;
               webres = webreq.GetResponse();&lt;br /&gt;
          }&lt;br /&gt;
          catch (Exception exc)&lt;br /&gt;
          {&lt;br /&gt;
               Console.WriteLine(&amp;quot;HtmlDump: {0}&amp;quot;, exc.Message);&lt;br /&gt;
               return 1;&lt;br /&gt;
          }&lt;br /&gt;
   &lt;br /&gt;
          if (webres.ContentType.Substring(0, 4) != &amp;quot;text&amp;quot;)&lt;br /&gt;
          {&lt;br /&gt;
               Console.WriteLine(&amp;quot;HtmlDump: URI must be a text type.&amp;quot;);&lt;br /&gt;
               return 1;&lt;br /&gt;
          }&lt;br /&gt;
   &lt;br /&gt;
          Stream       stream = webres.GetResponseStream();&lt;br /&gt;
          StreamReader strrdr = new StreamReader(stream);&lt;br /&gt;
          string       strLine;&lt;br /&gt;
   &lt;br /&gt;
          while ((strLine = strrdr.ReadLine()) != null){&lt;br /&gt;
               Console.WriteLine(strLine);&lt;br /&gt;
          }&lt;br /&gt;
          stream.Close();&lt;br /&gt;
          return 0;&lt;br /&gt;
     }&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
           &lt;br /&gt;
       &amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Create GetResponse from WebRequest==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
&amp;lt;source lang=&amp;quot;csharp&amp;quot;&amp;gt;&lt;br /&gt;
using System;&lt;br /&gt;
using System.Net;&lt;br /&gt;
using System.IO;&lt;br /&gt;
using System.Drawing;&lt;br /&gt;
using System.Windows.Forms;&lt;br /&gt;
public class MainClass {&lt;br /&gt;
    public static void Main() {&lt;br /&gt;
        string picUri = &amp;quot;http://www.apress.ru/img/img05/Hex_RGB4.jpg&amp;quot;;&lt;br /&gt;
        string htmlUri = &amp;quot;http://www.apress.ru&amp;quot;;&lt;br /&gt;
        WebRequest requestPic = WebRequest.Create(picUri);&lt;br /&gt;
        WebRequest requestHtml = WebRequest.Create(htmlUri);&lt;br /&gt;
        WebResponse responsePic = requestPic.GetResponse();&lt;br /&gt;
        WebResponse responseHtml = requestHtml.GetResponse();&lt;br /&gt;
        Image img = Image.FromStream(responsePic.GetResponseStream());&lt;br /&gt;
        using (StreamReader r = new StreamReader(responseHtml.GetResponseStream())) {&lt;br /&gt;
            Console.WriteLine(r.ReadToEnd());&lt;br /&gt;
        }&lt;br /&gt;
    }&lt;br /&gt;
}&lt;br /&gt;
           &lt;br /&gt;
       &amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Download a web page in a thread==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
&amp;lt;source lang=&amp;quot;csharp&amp;quot;&amp;gt;&lt;br /&gt;
using System;&lt;br /&gt;
using System.Net;&lt;br /&gt;
using System.Threading;&lt;br /&gt;
class ThreadTest {&lt;br /&gt;
    static void Main() {&lt;br /&gt;
        new Thread(Download).Start();&lt;br /&gt;
        Console.WriteLine(&amp;quot;download&amp;quot;s happening!&amp;quot;);&lt;br /&gt;
        Console.ReadLine();&lt;br /&gt;
    }&lt;br /&gt;
    static void Download() {&lt;br /&gt;
        using (WebClient wc = new WebClient())&lt;br /&gt;
            try {&lt;br /&gt;
                wc.Proxy = null;&lt;br /&gt;
                wc.DownloadFile(&amp;quot;http://www.google.ru&amp;quot;, &amp;quot;index.html&amp;quot;);&lt;br /&gt;
                Console.WriteLine(&amp;quot;Finished!&amp;quot;);&lt;br /&gt;
            } catch (Exception ex) {&lt;br /&gt;
            }&lt;br /&gt;
    }&lt;br /&gt;
}&lt;br /&gt;
           &lt;br /&gt;
       &amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==MiniCrawler: A skeletal Web crawler==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
&amp;lt;source lang=&amp;quot;csharp&amp;quot;&amp;gt;&lt;br /&gt;
/*&lt;br /&gt;
C#: The Complete Reference &lt;br /&gt;
by Herbert Schildt &lt;br /&gt;
Publisher: Osborne/McGraw-Hill (March 8, 2002)&lt;br /&gt;
ISBN: 0072134852&lt;br /&gt;
*/&lt;br /&gt;
&lt;br /&gt;
// MiniCrawler: A skeletal Web crawler. &lt;br /&gt;
 &lt;br /&gt;
using System; &lt;br /&gt;
using System.Net; &lt;br /&gt;
using System.IO; &lt;br /&gt;
 &lt;br /&gt;
public class MiniCrawler {  &lt;br /&gt;
 &lt;br /&gt;
  // Find a link in a content string. &lt;br /&gt;
  static string FindLink(string htmlstr,  &lt;br /&gt;
                         ref int startloc) { &lt;br /&gt;
    int i; &lt;br /&gt;
    int start, end; &lt;br /&gt;
    string uri = null; &lt;br /&gt;
    string lowcasestr = htmlstr.ToLower(); &lt;br /&gt;
 &lt;br /&gt;
    i = lowcasestr.IndexOf(&amp;quot;href=\&amp;quot;http&amp;quot;, startloc); &lt;br /&gt;
    if(i != -1) { &lt;br /&gt;
      start = htmlstr.IndexOf(&amp;quot;&amp;quot;&amp;quot;, i) + 1; &lt;br /&gt;
      end = htmlstr.IndexOf(&amp;quot;&amp;quot;&amp;quot;, start); &lt;br /&gt;
      uri = htmlstr.Substring(start, end-start); &lt;br /&gt;
      startloc = end; &lt;br /&gt;
    } &lt;br /&gt;
             &lt;br /&gt;
    return uri; &lt;br /&gt;
  } &lt;br /&gt;
 &lt;br /&gt;
  public static void Main(string[] args) { &lt;br /&gt;
    string link = null; &lt;br /&gt;
    string str; &lt;br /&gt;
    string answer; &lt;br /&gt;
 &lt;br /&gt;
    int curloc; // holds current location in response &lt;br /&gt;
 &lt;br /&gt;
    if(args.Length != 1) { &lt;br /&gt;
      Console.WriteLine(&amp;quot;Usage: MiniCrawler &amp;lt;uri&amp;gt;&amp;quot;); &lt;br /&gt;
      return ; &lt;br /&gt;
    } &lt;br /&gt;
 &lt;br /&gt;
    string uristr = args[0]; // holds current URI &lt;br /&gt;
 &lt;br /&gt;
    try { &lt;br /&gt;
 &lt;br /&gt;
      do { &lt;br /&gt;
        Console.WriteLine(&amp;quot;Linking to &amp;quot; + uristr); &lt;br /&gt;
 &lt;br /&gt;
        /* Create a WebRequest to the specified URI. */&lt;br /&gt;
        HttpWebRequest req = (HttpWebRequest) &lt;br /&gt;
               WebRequest.Create(uristr); &lt;br /&gt;
 &lt;br /&gt;
        uristr = null; // disallow further use of this URI &lt;br /&gt;
 &lt;br /&gt;
        // Send that request and return the response. &lt;br /&gt;
        HttpWebResponse resp = (HttpWebResponse) &lt;br /&gt;
               req.GetResponse(); &lt;br /&gt;
 &lt;br /&gt;
        // From the response, obtain an input stream. &lt;br /&gt;
        Stream istrm = resp.GetResponseStream(); &lt;br /&gt;
 &lt;br /&gt;
        // Wrap the input stream in a StreamReader. &lt;br /&gt;
        StreamReader rdr = new StreamReader(istrm); &lt;br /&gt;
 &lt;br /&gt;
        // Read in the entire page. &lt;br /&gt;
        str = rdr.ReadToEnd(); &lt;br /&gt;
 &lt;br /&gt;
        curloc = 0; &lt;br /&gt;
        &lt;br /&gt;
        do { &lt;br /&gt;
          // Find the next URI to link to. &lt;br /&gt;
          link = FindLink(str, ref curloc); &lt;br /&gt;
 &lt;br /&gt;
          if(link != null) { &lt;br /&gt;
            Console.WriteLine(&amp;quot;Link found: &amp;quot; + link); &lt;br /&gt;
 &lt;br /&gt;
            Console.Write(&amp;quot;Link, More, Quit?&amp;quot;); &lt;br /&gt;
            answer = Console.ReadLine(); &lt;br /&gt;
 &lt;br /&gt;
            if(string.rupare(answer, &amp;quot;L&amp;quot;, true) == 0) { &lt;br /&gt;
              uristr = string.Copy(link); &lt;br /&gt;
              break; &lt;br /&gt;
            } else if(string.rupare(answer, &amp;quot;Q&amp;quot;, true) == 0) { &lt;br /&gt;
              break; &lt;br /&gt;
            } else if(string.rupare(answer, &amp;quot;M&amp;quot;, true) == 0) { &lt;br /&gt;
              Console.WriteLine(&amp;quot;Searching for another link.&amp;quot;); &lt;br /&gt;
            } &lt;br /&gt;
          } else { &lt;br /&gt;
            Console.WriteLine(&amp;quot;No link found.&amp;quot;); &lt;br /&gt;
            break; &lt;br /&gt;
          } &lt;br /&gt;
 &lt;br /&gt;
        } while(link.Length &amp;gt; 0); &lt;br /&gt;
 &lt;br /&gt;
        // Close the Response. &lt;br /&gt;
        resp.Close(); &lt;br /&gt;
      } while(uristr != null); &lt;br /&gt;
 &lt;br /&gt;
    } catch(WebException exc) { &lt;br /&gt;
      Console.WriteLine(&amp;quot;Network Error: &amp;quot; + exc.Message +  &lt;br /&gt;
                        &amp;quot;\nStatus code: &amp;quot; + exc.Status); &lt;br /&gt;
    } catch(ProtocolViolationException exc) { &lt;br /&gt;
      Console.WriteLine(&amp;quot;Protocol Error: &amp;quot; + exc.Message); &lt;br /&gt;
    } catch(UriFormatException exc) { &lt;br /&gt;
      Console.WriteLine(&amp;quot;URI Format Error: &amp;quot; + exc.Message); &lt;br /&gt;
    } catch(NotSupportedException exc) { &lt;br /&gt;
      Console.WriteLine(&amp;quot;Unknown Protocol: &amp;quot; + exc.Message); &lt;br /&gt;
    } catch(IOException exc) { &lt;br /&gt;
      Console.WriteLine(&amp;quot;I/O Error: &amp;quot; + exc.Message); &lt;br /&gt;
    } &lt;br /&gt;
 &lt;br /&gt;
    Console.WriteLine(&amp;quot;Terminating MiniCrawler.&amp;quot;); &lt;br /&gt;
  } &lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
           &lt;br /&gt;
       &amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Output webpage content==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
&amp;lt;source lang=&amp;quot;csharp&amp;quot;&amp;gt;&lt;br /&gt;
&lt;br /&gt;
using System.Net;&lt;br /&gt;
using System;&lt;br /&gt;
using System.IO;&lt;br /&gt;
public class WebPagesApp {&lt;br /&gt;
    [STAThread]&lt;br /&gt;
    public static void Main(string[] args) {&lt;br /&gt;
        string s = &amp;quot;http://www.microsoft.ru&amp;quot;;&lt;br /&gt;
        Uri uri = new Uri(s);&lt;br /&gt;
        WebRequest req = WebRequest.Create(uri);&lt;br /&gt;
        WebResponse resp = req.GetResponse();&lt;br /&gt;
        Stream str = resp.GetResponseStream();&lt;br /&gt;
        StreamReader sr = new StreamReader(str);&lt;br /&gt;
        string t = sr.ReadToEnd();&lt;br /&gt;
        int i = t.IndexOf(&amp;quot;&amp;lt;HEAD&amp;gt;&amp;quot;);&lt;br /&gt;
        int j = t.IndexOf(&amp;quot;&amp;lt;/HEAD&amp;gt;&amp;quot;);&lt;br /&gt;
        string u = t.Substring(i, j);&lt;br /&gt;
        Console.WriteLine(&amp;quot;{0}&amp;quot;, u);&lt;br /&gt;
    }&lt;br /&gt;
}&lt;br /&gt;
           &lt;br /&gt;
       &amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Set the BaseAddress for WebClient==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
&amp;lt;source lang=&amp;quot;csharp&amp;quot;&amp;gt;&lt;br /&gt;
using System;&lt;br /&gt;
using System.Collections.Generic;&lt;br /&gt;
using System.Text;&lt;br /&gt;
using System.Net;&lt;br /&gt;
class Program {&lt;br /&gt;
    static void Main(string[] args) {&lt;br /&gt;
        WebClient client = new WebClient();&lt;br /&gt;
        client.BaseAddress = &amp;quot;http://www.microsoft.ru&amp;quot;;&lt;br /&gt;
        string data = client.DownloadString(&amp;quot;Office&amp;quot;);&lt;br /&gt;
        Console.WriteLine(data);&lt;br /&gt;
    }&lt;br /&gt;
}&lt;br /&gt;
           &lt;br /&gt;
       &amp;lt;/source&amp;gt;&lt;/div&gt;</summary>
		<author><name>Admin</name></author>	</entry>

	</feed>