gistfile1.txt '经营策略', 54='棋牌天地', 102238='儿童游戏', 101587='角色扮演', 19='休闲益智', 20='动作冒险', 100451='网络游戏', 51='体育竞速', 52='飞行射击'];public function __construct(){$link = mysql_co
'经营策略', 54=>'棋牌天地', 102238=>'儿童游戏', 101587=>'角色扮演', 19=>'休闲益智', 20=>'动作冒险', 100451=>'网络游戏', 51=>'体育竞速', 52=>'飞行射击']; public function __construct() { $link = mysql_connect('localhost' , 'market' , 'AW_d4au_jA3hH21212'); if (!$link) { echo 'connect failed......'; die; } mysql_select_db('market'); } public function getPages() { $pages = []; foreach ($this->cateId as $key=>$value) { foreach ($this->url as $url) { for ($i=1; $i<=50; $i++) { $pages[] = self::BASEURL.'/'.$key.'/'.$url.'/?page='.$i; // 有分类id } } } return $pages; } // 获取产品的sid public function getSids() { set_time_limit(0); $sids = array(); $pages = $this->getPages(); // 有分类id foreach ($pages as $page) { $posStart = strpos($page, 'cid/') + 4; $posEnd = strpos($page, '/order'); $cid = substr($page, $posStart, $posEnd-$posStart); // 分类id $htmlData = file_get_contents($page); preg_match_all('/(.*?)<\/a><\/h3>/ism', $htmlData, $sidArr); foreach ($sidArr[1] as $sid) { // $sids[] = $sid; // $sid作为键 $cid作为值 $sids["$sid"] = $cid; } } return $sids; } // 获取详情页数据 public function getData() { $sids = $this->getSids(); foreach ($sids as $sid=>$cid) { $detailUrl = 'http://zhushou.360.cn/detail/index/soft_id/'.$sid; $htmlData = file_get_contents($detailUrl); // 在360市场中的id $b_id = $sid; // 名称和图标 图标要保存到服务器 preg_match_all('/
<\/dt>/ism', $htmlData, $iconArr); $name = $iconArr[2][0]; $icon = $iconArr[1][0]; // 保存到服务器 $icon = $this->saveIconToLocal($icon); // 版本号 preg_match_all('/版本:<\/strong>(.*?)<\/td>/ism', $htmlData, $versionArr); $version = strip_tags($versionArr[1][0]); // 下载和size preg_match_all('/(.*?)<\/span>/ism', $htmlData, $downArr); $size = strip_tags($downArr[0][1]); $count = strip_tags($downArr[0][0]); $count = $this->getCount($count); // 处理后的下载量 // 下载链接 preg_match_all('/ .*?<\/a>/ism', $htmlData, $urlArr); $urlStr = $urlArr[1][0]; $url = substr($urlStr, strrpos($urlStr, 'http')); // 包名 $posStart = strrpos($url, 'com'); $posEnd = strrpos($url, '_'); $package = substr($url, $posStart, $posEnd-$posStart); // 获取介绍 preg_match_all('/ .*?<\/div>/is', $htmlData, $introArr); $introStr = $introArr[0][0]; // 这个也要处理 $pos = strpos($introStr, 'base-info'); $intro = trim(strip_tags(html_entity_decode(substr($introStr, 0, $pos-12)))); // 展示图 $screen = $this->getScreen($htmlData); // 处理分类 switch ($cid) { case 19: // 休闲益智 $cateid = 11; break; case 20: // 动作冒险 $cateid = 12; break; case 51: // 体育竞速 $cateid = 34; break; case 52: // 飞行射击 $cateid = 12; break; case 53: // 经营策略 $cateid = 32; break; case 54: // 棋牌天地 $cateid = 13; break; case 102238: // 儿童游戏 $cateid = 11; break; case 101587: // 角色扮演 $cateid = 33; break; case 100451: // 网络游戏 $cateid = 14; break; } $uptime = date('Y-m-d H:i:s', time()); $sql = "SELECT id FROM market_product WHERE package_apk='".$package."'"; // echo $sql.'
';die; $res = mysql_query($sql); if (!$res['id']) { $sql = 'INSERT INTO market_product(name, icon, size, downurl, downnum, screenshots, version, intro, package_apk, b_id, cateid, uptime, score) values ("%s", "%s", "%s", "%s", %s, "%s", "%s", "%s", "%s", %s, %s, "%s", %d)'; $sql = sprintf($sql, $name, $icon, $size, $url, $count, $screen, $version, $intro, $package, $b_id, $cateid, $uptime, 4); mysql_query($sql); } } } // 处理下载量 public function getCount($count) { $numStr = substr($count, 9, -3); $unit = substr($numStr, -3); if ($unit == '万') { $num = substr($numStr, 0, -3)*10000; } elseif ($unit == '亿') { $num = substr($numStr, 0, -3)*100000000; } else { return $numStr; } return $num; } // 处理详情图 public function getScreen($htmlData) { preg_match_all('/ (.*?)<\/p>/ism', $htmlData, $screenArr); if (!count($screenArr)) { preg_match_all('/(.*?)<\/p>/ism', $htmlData, $screenArr); } $screenStr = implode(',', $screenArr[1]); preg_match_all('//ism', $screenStr, $screen); $screen = implode(',', $screen[1]); } return $screen; } public function saveIconToLocal($icon) { // todo code } } $spider = new Spider(); $spider->getData();