analysis_log.php "01", "Feb"="02", "Mar"="03", "Apr"="04", "May"="05", "Jun"="06", "Jul"="07", "Aug"="08", "Sept"="09", "Oct"="10", "Nov"="11", "Dec"="12", );// public $logPath = 'E:/Spiders/Log_download/'; public $logPath = '/www/wwwlogs/b
"01", "Feb"=>"02", "Mar"=>"03", "Apr"=>"04", "May"=>"05", "Jun"=>"06", "Jul"=>"07", "Aug"=>"08", "Sept"=>"09", "Oct"=>"10", "Nov"=>"11", "Dec"=>"12", ); // public $logPath = 'E:/Spiders/Log_download/'; public $logPath = '/www/wwwlogs/backup/'; public $rootPath = '/wwwroot/wwwroot/'; public function file_name() { foreach ($this->domain_list as $domain) { if(is_dir($this->logPath.$domain)) // 如果域名文件存在 { $file_list = glob($this->logPath.$domain.'/'.date("Y-m-d").'.log'); foreach ($file_list as $file) //www/wwwlogs/backup/028webs.com/*.log { $this->analysis_log($file); // 将log文件完整路径传入计算函数 } } } } public function analysis_log($file) { //单文件解析 $logdata = array(); $log = new SplFileObject($file); foreach ($log as $line) { //日志计算 if($line) { $record_list = explode(' ', $line); if ($record_list[3]) { $date = $record_list[3]; preg_match('/^(20\d\d)/i', explode('/', $date)[2], $year); $month = $this->mon_dict[explode('/', $date)[1]]; $day = str_replace('[', '', explode('/', $date)[0]); $day_format = $year[0] . '-' . $month . '-' . $day; $url = $record_list[6]; $status = $record_list[8]; } if (!array_key_exists($day_format, $logdata)){ // 如果day不在列表存在 $logdata[$day_format] = ['Baidu' => 0, '360' => 0, 'sm' => 0, 'Sogou' => 0,'Crawl_url'=>array()]; } else { if (preg_match('/(Baiduspider)/i', $line)) # 查找百度Spider { $logdata[$day_format]['Baidu']++; $logdata[$day_format]['Crawl_url'][] = $url.':'.$status; } elseif (preg_match('/(HaosouSpider|360Spider)/i', $line)) { $logdata[$day_format]['360']++; $logdata[$day_format]['Crawl_url'][] = $url.':'.$status; } elseif (preg_match('/(YisouSpider)/i', $line)) { $logdata[$day_format]['sm']++; $logdata[$day_format]['Crawl_url'][] = $url.':'.$status; } elseif (preg_match('/(Sogou web spider)/i', $line)) { $logdata[$day_format]['Sogou']++; $logdata[$day_format]['Crawl_url'][] = $url.' : '.$status; } else continue; } } } foreach ($logdata as $day => $message) { $today_analysis = ''.$day . ' : '; foreach ($message as $k => $v) { if($k == 'Crawl_url') { $url_list = array(); foreach ($v as $u) { $url_list[] = $u; } } else $today_analysis .= $k . ' : ' . $v . ' ;'; } //echo $today_analysis.PHP_EOL; $today_analysis .= '-----------------'; foreach ($url_list as $url_status) { $today_analysis .= ''.$url_status.'
'; } /* analysis logs format 2017-07-31 : Baidu : 8 ;360 : 0 ;sm : 0 ;Sogou : 0 ;----------------- /:200 /:499 /silian.txt:200 /sitemap.xml:304 /m/:301 /m/:200 */ $domain = explode('/', $file)[4]; $is_m = 0; if(strstr($domain,'m.')) { $domain = str_replace('m.', '', $domain); $is_m = 1; } $domain_logs = $this->rootPath . $domain . '/logs/'; if(!file_exists($domain_logs)) mkdir($domain_logs,0777); if($is_m) $put_file = $domain_logs . date("Y-m-d") . '_m_analysis.html'; else $put_file = $domain_logs . date("Y-m-d") . '_analysis.html'; file_put_contents($put_file, $today_analysis.PHP_EOL,FILE_APPEND); } } } $analysis = new analysis_logs(); $analysis->file_name();