这个是使用爬虫如何爬取用户数据技术实现的吗,获取网上的资源?


C#WPF–网络资源爬虫
作者心得真没你想的那么高级,真的算是我第一个与http有交互的玩意儿吧运用到了:正则表达式(后来代码被我弄去研究了,貌似改动过)
源代码
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;namespace TQJ
{/// <summary>/// MainWindow.xaml 的交互逻辑/// </summary>public partial class MainWindow : Window{public MainWindow(){
InitializeComponent();}public string wwwtext;public static string textclass;private void Window_Initialized(object sender, EventArgs e){if (!Directory.Exists(@"e:\data")){
Directory.CreateDirectory(@"e:\data");}}public static string get_uft8(string unicodeString){
UTF8Encoding utf8 = new UTF8Encoding();Byte[] encodedBytes = utf8.GetBytes(unicodeString);String decodedString = utf8.GetString(encodedBytes);return decodedString;}private void Button_Click(object sender, RoutedEventArgs e)//提取图片{wwwtext = www.Text;string lt= ("_" + System.Guid.NewGuid().ToString() + "_" + System.Guid.NewGuid().ToString()).ToUpper();try{//WebClient client = new WebClient();Byte[] pageData = client.DownloadData("");string html= Encoding.GetEncoding("utf-8").GetString(pageData);TextBox.Text += html;MatchCollection matches = Regex.Matches(html, @"[0-9]{4}", RegexOptions.IgnoreCase);foreach (Match item in matches){
TextBox.Text = TextBox.Text + item.Value;
string pathImg = "http://120.77.214.11/code.ajx?t=" + item.Value;//client.DownloadFile("https://vpn.jift.edu.cn/http/77726476706e69737468656265737421e3e44ed22d396e44300d8db9d6562d/cas/codeimage?vpn-1", @"e:\data\" + System.DateTime.Now.ToFileTime() + ".jpg");//
//}MessageBox.Show("提取成功!", "提示!");}catch{MessageBox.Show("该网站已加密,请换一个试试");}}private void Button_Click_1(object sender, RoutedEventArgs e)//提取电话{StreamWriter sw = new StreamWriter(@"e:\data\phone.txt", true); wwwtext = www.Text;try{WebClient client = new WebClient();string html = client.DownloadString(wwwtext);MatchCollection matches = Regex.Matches(html, @"(134|135|136|137|138|139|150|151|152|157|158|159|147|182|183|184|187|188|170|178|130|131|132|145|155|156|185|186|176|170|171|166|133|153|180|181|189|170|177|173)(\b)?[0-9]{8}");foreach (Match item in matches){sw.WriteLine(item.Value);TextBox.Text = TextBox.Text +"\r\n"+ item.Value;}MessageBox.Show("提取成功!", "提示!");}catch{MessageBox.Show("该网站不存在电话号码,请换一个试试");}sw.Close();}private void Button_Click_2(object sender, RoutedEventArgs e){StreamWriter sw = new StreamWriter(@"e:\data\email.txt", true);wwwtext = www.Text;try{WebClient client = new WebClient();string html = client.DownloadString(wwwtext);MatchCollection matches = Regex.Matches(html, @"([A-Za-z0-9\u4e00-\u9fa5]+)@([a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+)");foreach (Match item in matches){sw.WriteLine("用户名:" + item.Groups[1].Value+";域名:"+ item.Groups[2].Value);TextBox.Text = TextBox.Text + "\r\n" + "用户名:" + item.Groups[1].Value + ";域名:" + item.Groups[2].Value;}MessageBox.Show("提取成功!", "提示!");}catch{MessageBox.Show("该网站不存在邮箱,请换一个试试");}sw.Close();}private void Button_Click_3(object sender, RoutedEventArgs e){StreamWriter sw = new StreamWriter(@"e:\data\address.txt", true);wwwtext = www.Text;try{WebClient client = new WebClient();string html = client.DownloadString(wwwtext);MatchCollection matches = Regex.Matches(html, @"(.+)\[port=([0-9]{2,5})(,type=(.+))?\]");foreach (Match item in matches){sw.WriteLine("IP:" + item.Groups[1].Value + ";port:" + item.Groups[2].Value);TextBox.Text = TextBox.Text + "\r\n" + "IP:" + item.Groups[1].Value + ";port:" + item.Groups[2].Value;}MessageBox.Show("提取成功!", "提示!");}catch{MessageBox.Show("该网站不存在地址,请换一个试试");}sw.Close();}private void Button_Click_4(object sender, RoutedEventArgs e){textclass = www.Text;this.Hide();Window1 p = new Window1();p.Show();}private void Button_Click_5(object sender, RoutedEventArgs e){Form1 form1 = new Form1();form1.Show();}private void Button_Click_6(object sender, RoutedEventArgs e){TextBox.Text=( "_"+System.Guid.NewGuid().ToString()+"_"+System.Guid.NewGuid().ToString()).ToUpper();}}}
效果截图
作者的话好像这个是我最后一个winform项目了接下来应该走Asp .net项目了虽然这2个关系不大,但是winform作为c#入门真的很有帮助!

我要回帖

更多关于 爬虫如何爬取用户数据 的文章

 

随机推荐