1 //以下是部分代码
2 List Weburllist = new List();
3 List Weburllistzx = new List();
4 StringBuilder weburlSB = new StringBuilder();
5 bool IsGenxin = false;
6 MatchCollection mcexplain = Regex.Matches(sjurlDR["LinkContent"].ToString(), @"(?]*>", RegexOptions.Singleline);
46 int Index = 1;
47 foreach (Match m in mc)
48 {
49 MatchCollection mc1 = Regex.Matches(m.Value.Replace("\"", "'"), @"[a-zA-z]+://[^']*", RegexOptions.Singleline);
50 if (mc1.Count > 0)
51 {
52 foreach (Match m1 in mc1)
53 {
54 string linkurlstr = string.Empty;
55 linkurlstr = m1.Value.Replace("\"", "").Replace("'", "").Replace(">", "").Replace(";", "");
56 weburlSB.Append("$-$");
57 weburlSB.Append(linkurlstr);
58 weburlSB.Append("$_$");
59 if (!Weburllist.Contains(linkurlstr) && !Weburllistzx.Contains(linkurlstr))
60 {
61 IsGenxin = true;
62 Weburllistzx.Add(linkurlstr);
63 linkSb.AppendFormat("{0}", linkurlstr);
64 }
65 }
66 }
67 else
68 {
69 if (m.Value.IndexOf("javascript") == -1)
70 {
71 string amstr = string.Empty;
72 string wangzhanxiangduilujin = string.Empty;
73 wangzhanxiangduilujin = sjurlDR["LinkUrl"].ToString().Substring(0, sjurlDR["LinkUrl"].ToString().LastIndexOf("/") + 1);
74 amstr = m.Value.Replace("href=\"", "href=\"" + wangzhanxiangduilujin).Replace("href='", "href='" + wangzhanxiangduilujin);
75 MatchCollection mc11 = Regex.Matches(amstr, @"[a-zA-z]+://[^\s]*", RegexOptions.Singleline);
76
77 foreach (Match m1 in mc11)
78 {
79 string linkurlstr = string.Empty;
80 linkurlstr = m1.Value.Replace("\"", "").Replace("'", "").Replace(">", "").Replace(";", "");
81 weburlSB.Append("$-$");
82 weburlSB.Append(linkurlstr);
83 weburlSB.Append("$_$");
84 if (!Weburllist.Contains(linkurlstr) && !Weburllistzx.Contains(linkurlstr))
85 {
86 IsGenxin = true;
87 Weburllistzx.Add(linkurlstr);
88 linkSb.AppendFormat("{0}", linkurlstr);
89 }
90 }
91 }
92 }
93 Index++;
94 }
95 System.Threading.Thread.Sleep(1000);
96 if (IsGenxin)
97 {
98 originlinksInfo oinfo = new originlinksInfo();
99 oinfo = originlinksLogic.Get(int.Parse(sjurlDR["ID"].ToString()));
100 oinfo.LinkContentnext = oinfo.LinkContent;
101 oinfo.LinkContent = weburlSB.ToString();
102 originlinksLogic.Update(oinfo);
103 System.Threading.Thread.Sleep(2000);
104 }
105
106 //如http://www.zjks.com/,这个网站总是采集失败,在这句代码
107 System.Net.WebResponse newswebresponse = newswebrequest.GetResponse();//这里在采集时总是跳出
跪求思路!
- 10 回答
- 0 关注
- 490 浏览
添加回答
举报
0/150
提交
取消