?php/** * * @authors HG (hg0728@qq.com) * @date 2015-05-22 17:00:48 * @version 1.0 */header("Content-type:text/html;charset=utf-8");function getCurl($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RE
<?php
/**
*
* @authors HG (hg0728@qq.com)
* @date 2015-05-22 17:00:48
* @version 1.0
*/
header("Content-type:text/html;charset=utf-8");
function getCurl($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
$result = curl_exec($ch);
curl_close($ch);
return $result;
}
function preg_list($str){//从curl获得指定内容
$regex = '/<h3><a class="titlelnk" href="(.*?)" target="_blank">(.*?)<\\/a><\\/h3>/';
$isMatched = preg_match_all($regex, $str, $matches);
for ($i=0; $i < $isMatched; $i++) {
$str = $matches[1][$i] .' '. $matches[2][$i];
echo $matches[1][$i];
file_put_contents('blogs.txt', $str. "\\n", FILE_APPEND);
}
}
for ($i=0; $i < 201; $i++) { //翻页抓取
if($i==0){
$url = 'http://www.cnblogs.com/';
$str = getCurl($url);
}
else {
$url = 'http://www.cnblogs.com/sitehome/p/'.$i;
$str = getCurl($url);
}
preg_list($str);
}
