特色栏目： python 批处理 net编程 Javascript Php Asp Css Html5 Android seo centos

Python从门到精通(三):高级操作-01-迭代

来源：互联网收集：自由互联发布时间：2022-06-24

一、手动创建迭代器 1.1、基础 # 推荐这种写法，而不要用异常来写 with open ( '/etc/passwd' ) as f : while True : line = next ( f , None ) if not line : break print ( line , end = '' ) num_list = [ 1 , 2 , [ 3 , 4 , [

一、手动创建迭代器

1.1、基础

# 推荐这种写法，而不要用异常来写
with open('/etc/passwd') as f:
while True:
line = next(f, None)
if not line:
break
print(line, end='')

num_list = [1, 2, [3, 4, [5, 6], 7], 8]
for x in flatten(num_list):
print(f'number flatten: {x}')# 另一种迭代器的写法
num_list = [1, 2, 3]
items = iter(num_list)
for i in range(len(num_list)):
print(f'first items next is:{next(items)}')# 通过stop来停止
def manual_iter():
with open('/etc/passwd') as f:
try:
while True:
print(next(f), end='')
except StopIteration:
pass
manual_iter()"""索引值迭代"""
test_list = ['a', 'b', 'c']
# 指定索引值从1开始
# for idx, str_val in enumerate(test_list, 1):
for idx, str_val in enumerate(test_list):
print(f'index is: {idx}, str value is: {str_val}')

1.2、实现迭代协议（iter方法）

迭代协议要求一个iter()方法返回一个特殊的迭代器对象，这个迭代器对象实现了next()并通过StopIteration异常标识迭代完成。

class Node:
def __init__(self, value):
self._value = value
self._children = []

def __repr__(self):
return f'Node({self._value})'

def add_child(self, node):
self._children.append(node)

def __iter__(self):
return iter(self._children)

def depth_first(self):
yield self
for c in self:
yield from c.depth_first()

if __name__ == '__main__':
root = Node(0)
child_1 = Node(1)
child_2 = Node(2)
root.add_child(child_1)
root.add_child(child_2)
child_1.add_child(Node(3))
child_1.add_child(Node(4))
child_2.add_child(Node(5))

for ch in root.depth_first():
print(f'child is: {ch}')

1.3、反向迭代

迭代协议要求一个iter()方法返回一个特殊的迭代器对象，这个迭代器对象实现了next()并通过StopIteration异常标识迭代完成。

1.3.1、确定好大小的集合

#反向迭代只适用于事先确定好大小的集合
a = [1, 2, 3, 4]
b = list()
for x in reversed(a): #这个方法
b.append(x)

1.3.2、不确定好大小的集合

#如果不确定大小，可以放在list中，但比较占空间
f = open('/etc/passwd')
for line in reversed(list(f)):
print(line, end='')

#所以需要用自定义方法reversed来优化空间问题
class Countdown:
def __init__(self, start):
self.start = start

# Forward iterator
def __iter__(self):
n = self.start
while n > 0:
yield n
n -= 1

# Reverse iterator
def __reversed__(self):
n = 1
while n <= self.start:
yield n
n += 1

for rev_val in reversed(Countdown(20)):
print(f'reversed order: {rev_val}')
for nor_val in Countdown(20):
print(f'normal order: {nor_val}')

二、迭代对象的操作

内部含有__iter__方法的对象都是可迭代对象。

num_list = [1, 3, 5, 7, 9]
head, *rest = num_list #注意*号的使用，星号表达式永远是列表类型，它表示不确定个数，注意看输出
print(f'head is:{head}, rest list is:{rest}') #head is:1, rest list is:[3, 5, 7, 9]

2.1、保留最后N个元素-deque

这个相当于固定大小的管道

from collections import deque

def search(lines, search_val, history=1):
"""deque当新加入的元素大于固定时，最老的元素会被移除"""
previous_lines = deque(maxlen=history)
for line in lines:
if search_val in line:
"""yield表达式的用处可以把搜索过程和结果的代码解耦"""
yield line, previous_lines
previous_lines.append(line)

"""这个语法需要注意一下"""
if __name__ == '__main__':
with open(r'test_file/element.txt') as f:
for search_v, prev_lines in search(f, 'python', 2):
for pre_line in prev_lines:
print(pre_line, end='')
print(f'search value is:{search_v}')

2.2、查找最大和最小的N个元素-heapq

import heapq
num_list = [1, 33, 3, 18, 7, -5, 18, 33, 51, -60, 5]
print(heapq.nlargest(3, num_list))
print(heapq.nsmallest(3, num_list))

offer_dict = [
{'company_name': 'IBM', 'stock': 80, 'price': 81.1},
{'company_name': 'AAPL', 'stock': 60, 'price': 113.22},
{'company_name': 'FB', 'stock': 150, 'price': 91.09},
{'company_name': 'HPQ', 'stock': 30, 'price': 79.75},
{'company_name': 'YHOO', 'stock': 50, 'price': 85.35},
{'company_name': 'ACME', 'stock': 100, 'price': 76.65}
]
cheapest = heapq.nsmallest(3, offer_dict, key=lambda s: s['price'])
print(cheapest)

2.3、记录分组-groupby

from operator import itemgetter
from itertools import groupby

done_record = [
{'done': 'read book', 'date': '07/01/2020'},
{'done': 'work', 'date': '07/04/2020'},
{'done': 'family chat', 'date': '07/02/2020'},
{'done': 'run', 'date': '07/03/2020'},
{'done': 'sport', 'date': '07/02/2020'},
{'done': 'read 20 pages', 'date': '07/02/2020'},
{'done': 'run 5km', 'date': '07/01/2020'},
{'done': 'sport 2 hours', 'date': '07/04/2020'},
]

# Sort by the desired field first
done_record.sort(key=itemgetter('date'))
# Iterate in groups， groupby扫描整个序列的连续相同的值
for date, items in groupby(done_record, key=itemgetter('date')):
print(date)
for i in items:
print(' ', i)

"""另一种实现方式，用于分堆后查找"""
from collections import defaultdict
record_by_date = defaultdict(list)
for record in done_record:
record_by_date[record['date']].append(record)

for record in record_by_date['07/01/2020']:
print(record)

2.4、排序

sort()和sorted()的区别在于前者会改变原表内容，后面的不会。所以需要有选择的使用

class User(object):
def __init__(self, user_id):
self.user_id = user_id

def __repr__(self):
return 'User({})'.format(self.user_id)
#自定义排序实现方式1
if __name__ == "__main__":
users = [User(23), User(3), User(99)]
print(users) #[User(23), User(3), User(99)]
print(sorted(users, key=lambda u: u.user_id)) #[User(3), User(23), User(99)]
#自定义排序实现方式2，推荐
from operator import attrgetter
print(sorted(users, key=attrgetter('user_id'))) #[User(3), User(23), User(99)]

# by_name = sorted(users, key=attrgetter('last_name', 'first_name'))

print(min(users, key=attrgetter('user_id'))) #User(3)
print(max(users, key=attrgetter('user_id'))) #User(99)

2.5、命名切片

其实就是把条件定义为一个变量，以便重用

record_str = '......##............20 ....#..1513.5 ........##'
#命名切片实现方案
NUMBERS = slice(20, 22)
PRICE = slice(30, 36)
total_cost = int(record_str[NUMBERS]) * float(record_str[PRICE])
print(f'total cost is:{total_cost}') #total cost is:30270.0

#下面的会原样输出，主要是看下参数的值
split_obj = slice(3, 20, 2)
print(split_obj.start)
print(split_obj.stop)
print(split_obj.step)

str_obj = 'HelloWorld'
split_obj = slice(3, 20, 2)
for i in range(*split_obj.indices(len(str_obj))):
print(str_obj[i])

上一篇：Python从门到精通(三):高级操作-03-yield关键字
下一篇：没有了