本文是多年前学习编程时参照一个网友程序的基础之上改写的, 采用Python语音编写, 多线程下载功能, 可以有效提高Linux下原有下载工具中的一些不足,以下给出具体代码。 #!/usr/bi
本文是多年前学习编程时参照一个网友程序的基础之上改写的, 采用Python语音编写, 多线程下载功能, 可以有效提高Linux下原有下载工具中的一些不足,以下给出具体代码。
#!/usr/bin/python# -*- coding: utf-8 -*-
# Author: Devilmaycry
# Email:
# 本程序是多年前参照他人代码所写,并在原基础上做了一定改进,因时间过久已无法给出原出处,特此声明
# 测试平台 Ubuntu 14.04 X86_64 Python 2.7.6
import threading
import urllib2
import sys
import time
# 全局最大线程数
max_thread = 20
# 初始化锁
lock = threading.RLock()
class Downloader(threading.Thread):
def __init__(self, url, start_size, end_size, fobj, buffer):
self.url = url
self.buffer = buffer
self.start_size = start_size
self.end_size = end_size
self.fobj = fobj
threading.Thread.__init__(self)
#重连接次数
self.i = 1
def run(self):
print 'starting: %s' % self.getName()
self._download()
def _download(self):
while True:
try:
offset = self.start_size
req = urllib2.Request(self.url)
# 添加HTTP Header(RANGE)设置下载数据的范围
req.headers['Range'] = 'bytes=%s-%s' % (self.start_size, self.end_size)
f = urllib2.urlopen(req, timeout = 15)
# 初始化当前线程文件对象偏移量
while 1:
block = f.read(self.buffer)
# 当前线程数据获取完毕后, 则退出
if not block:
print '%s done.' % self.getName()
break
with lock:
# 设置文件对象偏移地址
self.fobj.seek(offset)
# 写入获取到的数据
self.fobj.write(block)
offset = offset + len(block)
break
except Exception:
print self.getName() + " failed time " + "====="*10 + str(self.i) + "\n"
self.i = self.i + 1
self.start_size = offset
def main(url, threadNum=30, save_file='', buffer=1024):
# 最大线程数量不能超过max_thread
threadNum = threadNum if threadNum <= max_thread else max_thread
# 获取文件的大小
req = urllib2.urlopen(url)
size = int(req.info().getheaders('Content-Length')[0])
# 初始化文件对象
fobj = open(save_file, 'wb')
# 根据线程数量计算 每个线程负责的http Range 大小
avg_size, pad_size = divmod(size, threadNum)
plist = []
for i in xrange(threadNum):
start_size = i*avg_size
end_size = start_size + avg_size - 1
if i == threadNum - 1:
# 最后一个线程加上pad_size
end_size = end_size + pad_size + 1
pTemp = Downloader(url, start_size, end_size, fobj, buffer)
plist.append(pTemp)
# 开始搬砖
for t in plist:
t.start()
# 等待所有线程结束
for t in plist:
t.join()
# 结束当然记得关闭文件对象
fobj.close()
print 'Download completed!'
if __name__ == '__main__':
start = time.time()
#url = "http://dlsw.baidu.com/sw-search-sp/soft/3a/12350/QQ7.1.14509.0.1429585990.exe"
url="https://d3kbcqa49mib13.cloudfront.net/spark-2.1.1-bin-hadoop2.7.tgz"
main(url=url, threadNum=20, save_file='test', buffer=4096)
end = time.time()
print end-start