特色栏目： python 批处理 net编程 Javascript Php Asp Css Html5 Android seo centos

androidbenchmark和iphonebenchmark这两页面中设备信息爬虫

来源：互联网收集：自由互联发布时间：2022-06-18

#coding=utf-8 #--------------------------------------- # 程序：androidbenchmark爬虫 # 作者：ewang # 日期：2016-7-11 # 语言：Python 2.7 # 功能：获取页面中的Android PassMark Rating信息保存到文件中。 #----------

#coding=utf-8
#---------------------------------------
# 程序：androidbenchmark爬虫
# 作者：ewang
# 日期：2016-7-11
# 语言：Python 2.7
# 功能：获取页面中的Android PassMark Rating信息保存到文件中。
#---------------------------------------

import string
import urllib2
import re
import os

class androidbenchmark_Spider:
#申明相关属性
def __init__(self,url):
#给SougoPicUrl属性赋值
self.androidbenchmarkUrl=url
#用来保存图片URL信息
self.androidbenchmark=[]
print u'爬虫，爬爬...'

#初始化加载页面并将其转码存储
def AndroidBenchMark(self):
#读取页面的原始信息
Page=urllib2.urlopen(self.androidbenchmarkUrl).read()

#获取页面标题
title=self.find_title(Page)
print u'网页名称:'+title

#获取页面中文本信息
self.save_infor(title)

#查找页面标题
def find_title(self,page):
#匹配<title>xxxx</title>
myTitle=re.search(r'<title>(.*?)</title>',page,re.S)

#初始化标题名为暂无标题
title=u'暂无标题'

#如果标题存在把标题赋值给title
if myTitle:
#(.*?)这称作一个group，组是从1开始
title=myTitle.group(1)
else:
print u'爬虫报告：无法加载网页标题...'
return title

#保存页面信息
def save_infor(self,title):
#加载页面文本信息到数组中
self.get_infor()

#创建并打开本地文件
f=open(title+'.csv','w+')

#把获取的页面信息写入文件中
f.writelines(self.androidbenchmark)

#关闭打开的文件
f.close()
print u'爬虫报告：文件'+title+'.csv'+u'已经下载:'+os.getcwd()
print u'按任意键退出...'
raw_input()

#获取页面源码并将其存储到数组中
def get_infor(self):
#获取页面中的源码
page=urllib2.urlopen(self.androidbenchmarkUrl).read()

#把页面中所有jpg图片的URL提取出来
self.deal_Android_Device(page)

def deal_Android_Device(self,page):
#获取所有设备名称
Android_Device=re.findall('\<a href=\"phone\.php\?phone=(.*?)\"\>',page,re.S)

#把手机型号的添加到androidbenchmark列表中
for aItem in Android_Device:

self.androidbenchmark.append(aItem+"\n")

#------------程序入口处----------------
print u"""#---------------------------------------
# 程序：程序：androidbenchmark爬虫
# 作者：ewang
# 日期：2016-7-7
# 语言：Python 2.7
# 功能：获取页面中的Android PassMark Rating信息保存到文件中。
#--------------------------------------------------
"""

#print u'需要爬取得URL(passmark_chart,memmark_chart,cpumark_chart,diskmark_chart,g2dmark_chart,g3dmark_chart)：'
#bdurl = 'http://www.androidbenchmark.net/' + str(raw_input(u'http://www.androidbenchmark.net/')) +'.html'

And_ipone=['http://www.androidbenchmark.net/','http://www.iphonebenchmark.net/']
chart_page=['passmark_chart.html','memmark_chart.html','cpumark_chart.html','diskmark_chart.html','g2dmark_chart.html','g3dmark_chart.html']
for dev in And_ipone:
for chart in chart_page:
bdurl=dev+chart
Android_Device_Name=androidbenchmark_Spider(bdurl)
Android_Device_Name.AndroidBenchMark()#coding=utf-8
#---------------------------------------
# 程序：androidbenchmark爬虫
# 作者：ewang
# 日期：2016-7-11
# 语言：Python 2.7
# 功能：获取页面中的Android PassMark Rating信息保存到文件中。
#---------------------------------------

import string
import urllib2
import re
import os

class androidbenchmark_Spider:
#申明相关属性
def __init__(self,url):
#给SougoPicUrl属性赋值
self.androidbenchmarkUrl=url
#用来保存图片URL信息
self.androidbenchmark=[]
print u'爬虫，爬爬...'

#初始化加载页面并将其转码存储
def AndroidBenchMark(self):
#读取页面的原始信息
Page=urllib2.urlopen(self.androidbenchmarkUrl).read()

#获取页面标题
title=self.find_title(Page)
print u'网页名称:'+title

#获取页面中文本信息
self.save_infor(title)

#查找页面标题
def find_title(self,page):
#匹配<title>xxxx</title>
myTitle=re.search(r'<title>(.*?)</title>',page,re.S)

#初始化标题名为暂无标题
title=u'暂无标题'

#如果标题存在把标题赋值给title
if myTitle:
#(.*?)这称作一个group，组是从1开始
title=myTitle.group(1)
else:
print u'爬虫报告：无法加载网页标题...'
return title

#保存页面信息
def save_infor(self,title):
#加载页面文本信息到数组中
self.get_infor()

#创建并打开本地文件
f=open(title+'.csv','w+')

#把获取的页面信息写入文件中
f.writelines(self.androidbenchmark)

#关闭打开的文件
f.close()
print u'爬虫报告：文件'+title+'.csv'+u'已经下载:'+os.getcwd()
print u'按任意键退出...'
raw_input()

#获取页面源码并将其存储到数组中
def get_infor(self):
#获取页面中的源码
page=urllib2.urlopen(self.androidbenchmarkUrl).read()

#把页面中所有jpg图片的URL提取出来
self.deal_Android_Device(page)

def deal_Android_Device(self,page):
#获取所有设备名称
Android_Device=re.findall('\<a href=\"phone\.php\?phone=(.*?)\"\>',page,re.S)

#把手机型号的添加到androidbenchmark列表中
for aItem in Android_Device:

self.androidbenchmark.append(aItem+"\n")

#------------程序入口处----------------
print u"""#---------------------------------------
# 程序：程序：androidbenchmark爬虫
# 作者：ewang
# 日期：2016-7-7
# 语言：Python 2.7
# 功能：获取页面中的Android PassMark Rating信息保存到文件中。
#--------------------------------------------------
"""

print u'需要爬取得URL(passmark_chart,memmark_chart,cpumark_chart,diskmark_chart,g2dmark_chart,g3dmark_chart)：'
bdurl = 'http://www.androidbenchmark.net/' + str(raw_input(u'http://www.androidbenchmark.net/')) +'.html'
Android_Device_Name=androidbenchmark_Spider(bdurl)
Android_Device_Name.AndroidBenchMark()#coding=utf-8
#---------------------------------------
# 程序：iphonebenchmark爬虫
# 作者：ewang
# 日期：2016-7-11
# 语言：Python 2.7
# 功能：获取页面中的iphone PassMark Rating信息保存到文件中。
#---------------------------------------

import string
import urllib2
import re
import os

class iphonebenchmark_Spider:
#申明相关属性
def __init__(self,url):
#给SougoPicUrl属性赋值
self.iphonebenchmarkUrl=url
#用来保存图片URL信息
self.iphonebenchmark=[]
print u'爬虫，爬爬...'

#初始化加载页面并将其转码存储
def iphoneBenchMark(self):
#读取页面的原始信息
Page=urllib2.urlopen(self.iphonebenchmarkUrl).read()

#获取页面标题
title=self.find_title(Page)
print u'网页名称:'+title

#获取页面中文本信息
self.save_infor(title)

#查找页面标题
def find_title(self,page):
#匹配<title>xxxx</title>
myTitle=re.search(r'<title>(.*?)</title>',page,re.S)

#初始化标题名为暂无标题
title=u'暂无标题'

#如果标题存在把标题赋值给title
if myTitle:
#(.*?)这称作一个group，组是从1开始
title=myTitle.group(1)
else:
print u'爬虫报告：无法加载网页标题...'
return title

#保存页面信息
def save_infor(self,title):
#加载页面文本信息到数组中
self.get_infor()

#创建并打开本地文件
f=open(title+'.csv','w+')

#把获取的页面信息写入文件中
f.writelines(self.iphonebenchmark)

#关闭打开的文件
f.close()
print u'爬虫报告：文件'+title+'.csv'+u'已经下载:'+os.getcwd()
print u'按任意键退出...'
raw_input()

#获取页面源码并将其存储到数组中
def get_infor(self):
#获取页面中的源码
page=urllib2.urlopen(self.iphonebenchmarkUrl).read()

#把页面中所有jpg图片的URL提取出来
self.deal_iphone_Device(page)

def deal_iphone_Device(self,page):
#获取所有设备名称
iphone_Device=re.findall('\<a href=\"phone\.php\?phone=(.*?)\"\>',page,re.S)

#把手机型号的添加到iphonebenchmark列表中
for aItem in iphone_Device:

self.iphonebenchmark.append(aItem+"\n")

#------------程序入口处----------------
print u"""#---------------------------------------
# 程序：程序：iphonebenchmark爬虫
# 作者：ewang
# 日期：2016-7-7
# 语言：Python 2.7
# 功能：获取页面中的iphone PassMark Rating信息保存到文件中。
#--------------------------------------------------
"""

print u'需要爬取得URL(passmark_chart,memmark_chart,cpumark_chart,diskmark_chart,g2dmark_chart,g3dmark_chart)：'
bdurl = 'http://www.iphonebenchmark.net/' + str(raw_input(u'http://www.iphonebenchmark.net/')) +'.html'
iphone_Device_Name=iphonebenchmark_Spider(bdurl)
iphone_Device_Name.iphoneBenchMark()

【文章原创作者：香港服务器 http://www.558idc.com/hk.html提供,感谢支持】

上一篇：python列表解析实例(整数操作、字符操作、矩阵操作)
下一篇：没有了

androidbenchmark和iphonebenchmark这两页面中设备信息爬虫

相关文章