analysis_log.php "01", "Feb"="02", "Mar"="03", "Apr"="04", "May"="05", "Jun"="06", "Jul"="07", "Aug"="08", "Sept"="09", "Oct"="10", "Nov"="11", "Dec"="12", );// public $logPath = 'E:/Spiders/Log_download/'; public $logPath = '/www/wwwlogs/b
"01", "Feb"=>"02", "Mar"=>"03", "Apr"=>"04", "May"=>"05",
"Jun"=>"06", "Jul"=>"07", "Aug"=>"08", "Sept"=>"09", "Oct"=>"10",
"Nov"=>"11", "Dec"=>"12",
);
// public $logPath = 'E:/Spiders/Log_download/';
public $logPath = '/www/wwwlogs/backup/';
public $rootPath = '/wwwroot/wwwroot/';
public function file_name()
{
foreach ($this->domain_list as $domain)
{
if(is_dir($this->logPath.$domain)) // 如果域名文件存在
{
$file_list = glob($this->logPath.$domain.'/'.date("Y-m-d").'.log');
foreach ($file_list as $file) //www/wwwlogs/backup/028webs.com/*.log
{
$this->analysis_log($file); // 将log文件完整路径传入计算函数
}
}
}
}
public function analysis_log($file)
{
//单文件解析
$logdata = array();
$log = new SplFileObject($file);
foreach ($log as $line) { //日志计算
if($line)
{
$record_list = explode(' ', $line);
if ($record_list[3]) {
$date = $record_list[3];
preg_match('/^(20\d\d)/i', explode('/', $date)[2], $year);
$month = $this->mon_dict[explode('/', $date)[1]];
$day = str_replace('[', '', explode('/', $date)[0]);
$day_format = $year[0] . '-' . $month . '-' . $day;
$url = $record_list[6];
$status = $record_list[8];
}
if (!array_key_exists($day_format, $logdata)){ // 如果day不在列表存在
$logdata[$day_format] = ['Baidu' => 0, '360' => 0, 'sm' => 0, 'Sogou' => 0,'Crawl_url'=>array()];
}
else {
if (preg_match('/(Baiduspider)/i', $line)) # 查找百度Spider
{
$logdata[$day_format]['Baidu']++;
$logdata[$day_format]['Crawl_url'][] = $url.':'.$status;
}
elseif (preg_match('/(HaosouSpider|360Spider)/i', $line))
{
$logdata[$day_format]['360']++;
$logdata[$day_format]['Crawl_url'][] = $url.':'.$status;
}
elseif (preg_match('/(YisouSpider)/i', $line))
{
$logdata[$day_format]['sm']++;
$logdata[$day_format]['Crawl_url'][] = $url.':'.$status;
}
elseif (preg_match('/(Sogou web spider)/i', $line)) {
$logdata[$day_format]['Sogou']++;
$logdata[$day_format]['Crawl_url'][] = $url.' : '.$status;
}
else
continue;
}
}
}
foreach ($logdata as $day => $message) {
$today_analysis = ''.$day . ' : ';
foreach ($message as $k => $v) {
if($k == 'Crawl_url')
{
$url_list = array();
foreach ($v as $u)
{
$url_list[] = $u;
}
}
else
$today_analysis .= $k . ' : ' . $v . ' ;';
}
//echo $today_analysis.PHP_EOL;
$today_analysis .= '-----------------';
foreach ($url_list as $url_status)
{
$today_analysis .= ''.$url_status.'
';
}
/* analysis logs format
2017-07-31 : Baidu : 8 ;360 : 0 ;sm : 0 ;Sogou : 0 ;-----------------
/:200
/:499
/silian.txt:200
/sitemap.xml:304
/m/:301
/m/:200
*/
$domain = explode('/', $file)[4];
$is_m = 0;
if(strstr($domain,'m.'))
{
$domain = str_replace('m.', '', $domain);
$is_m = 1;
}
$domain_logs = $this->rootPath . $domain . '/logs/';
if(!file_exists($domain_logs))
mkdir($domain_logs,0777);
if($is_m)
$put_file = $domain_logs . date("Y-m-d") . '_m_analysis.html';
else
$put_file = $domain_logs . date("Y-m-d") . '_analysis.html';
file_put_contents($put_file, $today_analysis.PHP_EOL,FILE_APPEND);
}
}
}
$analysis = new analysis_logs();
$analysis->file_name();
