?php/** * * @authors HG (hg0728@qq.com) * @date 2015-05-22 17:00:48 * @version 1.0 */header("Content-type:text/html;charset=utf-8");function getCurl($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RE
<?php /** * * @authors HG (hg0728@qq.com) * @date 2015-05-22 17:00:48 * @version 1.0 */ header("Content-type:text/html;charset=utf-8"); function getCurl($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); $result = curl_exec($ch); curl_close($ch); return $result; } function preg_list($str){//从curl获得指定内容 $regex = '/<h3><a class="titlelnk" href="(.*?)" target="_blank">(.*?)<\\/a><\\/h3>/'; $isMatched = preg_match_all($regex, $str, $matches); for ($i=0; $i < $isMatched; $i++) { $str = $matches[1][$i] .' '. $matches[2][$i]; echo $matches[1][$i]; file_put_contents('blogs.txt', $str. "\\n", FILE_APPEND); } } for ($i=0; $i < 201; $i++) { //翻页抓取 if($i==0){ $url = 'http://www.cnblogs.com/'; $str = getCurl($url); } else { $url = 'http://www.cnblogs.com/sitehome/p/'.$i; $str = getCurl($url); } preg_list($str); }