一般是爬到$i为35-45之间的时候就会出现这个internal server error错误
class Pacong extends Base{
public function test(){
header("content-type:text/html;charset=utf-8");
set_time_limit(0); //执行时间无限
ini_set('memory_limit', '-1'); //内存无限
$array=[0=>'https://haikou.anjuke.com/ask/fl-qita/p',
1=>'https://haikou.anjuke.com/ask/fl-daikuan/p',
2=>'https://haikou.anjuke.com/ask/fl-maifang/p',
3=>'https://haikou.anjuke.com/ask/fl-maifanga/p',
4=>'https://haikou.anjuke.com/ask/fl-zufang/p',
5=>'https://haikou.anjuke.com/ask/fl-jiaoyiguohu/p'];
foreach ($array as $k=>$v) {
// echo $v;
$header=array();
for ($i = 1; $i < 100; $i++) {
// sleep(5);
echo $i;
$curlobj = curl_init();
//设置访问的url
curl_setopt($curlobj, CURLOPT_URL, $v . $i . "/");
//echo $array['0'] . $i . "/";
curl_setopt($curlobj, CURLOPT_TIMEOUT, 0);
curl_setopt($curlobj, CURLOPT_CONNECTTIMEOUT, 0);
//执行后不直接打印出
curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlobj, CURLOPT_HEADER, 1);
//curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curlobj,CURLOPT_HTTPHEADER,$header);
curl_setopt($curlobj,CURLOPT_COOKIE,'aQQ_ajkguid=65E1E78E-6422-B2AF-B73F-000C2FA17625; ctid=49; 58tj_uuid=dfc8d57c-c982-4cb0-9693-9e3b688e6d97; als=0; _ga=GA1.2.1683086249.1524535452; _gid=GA1.2.785162605.1524535452; isp=true; lps=http%3A%2F%2Fhaikou.anjuke.com%2Fask%2Ffl-qita%2Fp30%7C; twe=2; sessid=39A3A71B-0A07-8C1E-1B9D-9409C5F93F8B; init_refer=; new_uv=5; new_session=0; __xsptplusUT_8=1; __xsptplus8=8.5.1524622380.1524623253.8%234%7C%7C%7C%7C%7C%23%23yn1dRjCaHplJ6-hXacmy8mfcE82lTJHz%23;');
curl_setopt($curlobj, CURLOPT_SSL_VERIFYPEER, false); // 跳过证书检查
curl_setopt($curlobj, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36");
$output = curl_exec($curlobj); //执行获取内容
$info = curl_getinfo($curlobj);
print_r($info);
echo "<pre>";print_r(curl_error($curlobj));echo "</pre>";
echo "<pre>";print_r(curl_getinfo($curlobj));echo "</pre>";
echo "<pre>";print_r($header);echo "</pre>";
curl_close($curlobj); //关闭curl
}
}
}
}
2 回答
吃鸡游戏
TA贡献1829条经验 获得超7个赞
有可能是服务器做了反爬虫判断,就是如果检测出访问频繁再加上一些特性,判断出是爬虫爬取就返回500错误页面 curl_getinfo
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- 2 回答
- 0 关注
- 1107 浏览
添加回答
举报
0/150
提交
取消