当前位置 : 主页 > 编程语言 > python >

Python从门到精通(一):基础-04-集合类型

来源:互联网 收集:自由互联 发布时间:2022-06-27
一、tuple 1.1、不具名 #不具名,可以整体重新定义,不能按索引这样的方式单独赋值 dimensions = ( 5 , 10 ) print ( dimensions ) #(5, 10) dimensions = ( 5 , 15 ); print ( dimensions ) #(5, 15) 1.2、具名 #具名

一、tuple

1.1、不具名

#不具名,可以整体重新定义,不能按索引这样的方式单独赋值
dimensions = (5, 10)
print(dimensions)#(5, 10)
dimensions = (5, 15);
print(dimensions)#(5, 15)

1.2、具名

#具名
from collections import namedtuple

class Point:
def __init__(self, x, y, z):
self.x = x
self.y = y
self.z = z
#上面的代码与下面的等价
Point = namedtuple("Point", ["x", "y", "z"])
point = Point(x=1, y=2, z=3)
print(point.x) #1

# 获取所有字段名
print( point._fields )

# 也可以通过一个list来创建一个User对象,这里注意需要使用"_make"方法
user = Point._make(['Runoob', 'male', 12])

# 修改对象属性,注意要使用"_replace"方法
user = user._replace(x=22)
print(user.x)

二、list

默认的list的大小为80,在添加元素时,如果索引为负数则添加到索引0的位置,如果索引比最大值还大则添加到最后一位。最后一个元素index==-1,所以索引可以是正的也可以是负的。

str = ["a", "b", "c"];
squars = list(range(0,5));

squars[0:4] #相当于sub;
squars1 = squars[:] @复制整个列表
squars = [va**2 for va in range(10)]>>[0, 1, 4, 9, 16, 25, 36, 49, 64, 81] #列表解析,主要是省代码行;

2.1、解压

# tuple decompression
num_tup = (1, 2)
x, y = num_tup
print(f'x is:{x}, y is:{y}') #x is:1, y is:2

#----------------------------------------------
# 变量个数和序列元素的个数要匹配,否则产生异常
num_tup = (1, 2)
try:
x, y, z = num_tup
except Exception as ex:
print(f'出错了,出错原因:{ex}') #not enough values to unpack (expected 3, got 2)

#---------------------------------------------
# list object decompression
obj_list = ['abc', 10, 22.2, (2020, 3, 15)]
str_obj, int_obj, float_obj, tuple_obj = obj_list
print(f'tuple_obj is:{tuple_obj}') #tuple_obj is:(2020, 3, 15)

#---------------------------------------------
# int,float,tuple object decompression
str_obj, int_obj, float_obj, (year, month, day) = obj_list
print(f'year is:{year}, month is:{month}, day is:{day}') #year is:2020, month is:3, day is:15

1.3、利用set去重

def dedupe_1(items):
seen = set()
for item in items:
if item not in seen:
yield item
seen.add(item)


sequence_v = [1, 2, 3, 5, 2, 3]
print(list(dedupe_1(sequence_v)))#[1, 2, 3, 5]

1.4、统计

这个需要用到collections库。

from collections import Counter
words = [
'python', 'c++', 'abc', 'php', 'mysql', 'java', 'c#', '.net',
'ruby', 'lisp', 'python', 'python', 'mongodb', 'hive', 'spark', 'go', 'vb',
'java', "python", 'c', 'ios', 'sql', 'python', 'java', 'c++',
'hbase', 'go', "java", 'c++'
]
word_counts = Counter(words)
frequency_num = 2
# 出现频率最高的 frequency_num 个单词
top_three = word_counts.most_common(frequency_num)
print(f'出现频率最高的{frequency_num}个单词是:{top_three}')
#出现频率最高的2个单词是:[('python', 5), ('java', 4)]

print(f"python出现频率:{word_counts['python']}")
#python出现频率:5
print(f"go出现频率:{word_counts['go']}")
#go出现频率:2

more_words = ['python','java','go']
for word in more_words:
word_counts[word] += 1

print(f"python出现频率:{word_counts['python']}")
#python出现频率:6
print(f"go出现频率:{word_counts['go']}")
#go出现频率:3
more_words = ['python','java','python']
word_counts.update(more_words)
print(f"python出现频率:{word_counts['python']}")
#python出现频率:8
print(f"go出现频率:{word_counts['go']}")
#go出现频率:3

"""the object of a is:Counter({'python': 5, 'java': 4, 'c++': 3, 'go': 2, 'abc': 1, 'php': 1, 'mysql': 1, 'c#': 1, '.net': 1, 'ruby': 1, 'lisp': 1, 'mongodb': 1, 'hive': 1, 'spark': 1, 'vb': 1, 'c': 1, 'ios': 1, 'sql': 1, 'hbase': 1})
the object of b is:Counter({'python': 2, 'java': 1})
the object of c is:Counter({'python': 7, 'java': 5, 'c++': 3, 'go': 2, 'abc': 1, 'php': 1, 'mysql': 1, 'c#': 1, '.net': 1, 'ruby': 1, 'lisp': 1, 'mongodb': 1, 'hive': 1, 'spark': 1, 'vb': 1, 'c': 1, 'ios': 1, 'sql': 1, 'hbase': 1})
the object of d is:Counter({'python': 3, 'c++': 3, 'java': 3, 'go': 2, 'abc': 1, 'php': 1, 'mysql': 1, 'c#': 1, '.net': 1, 'ruby': 1, 'lisp': 1, 'mongodb': 1, 'hive': 1, 'spark': 1, 'vb': 1, 'c': 1, 'ios': 1, 'sql': 1, 'hbase': 1})
"""
a_obj = Counter(words)
b_obj = Counter(more_words)
print(f'the object of a is:{a_obj}')
print(f'the object of b is:{b_obj}')

c_obj = a_obj + b_obj
print(f'the object of c is:{c_obj}')

d_obj = a_obj - b_obj
print(f'the object of d is:{d_obj}')

1.5、过滤

exp_list = [1, 4, -5, 10, -7, 2, 3, -1]
print([n for n in exp_list if n > 0])
print([n for n in exp_list if n < 0])

"""上面的方法会占用大量的内存,这种生成器会性能好很多"""
pos_items = (n for n in exp_list if n > 0)
for item in pos_items:
print(item)

"""利用filter函数创建一个迭代器"""
val_list = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
try:
int(val)
return True
except ValueError:
return False
new_val_list = list(filter(is_int, val_list))
print(new_val_list)

"""过滤时替换值"""
import math
print([math.sqrt(n) for n in exp_list if n > 0])
print([n if n > 0 else 0 for n in exp_list])
print([n if n < 0 else 0 for n in exp_list])

"""compress适用于用另一个序列来过滤当前序列的情况"""
done_work = [
'read book',
'running',
'work',
'basketball',
'table tennis',
'bike',
'read 20 pages',
'running 5km',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]

from itertools import compress
more5 = [n > 5 for n in counts]
print(more5)
print(list(compress(done_work, more5)))

1.6、计算

num_list = [1, 2, 3, 4, 5]
print(sum(x * x for x in num_list))


import os
file_list = os.listdir('dirname')
if any(name.endswith('.py') for name in file_list):
print('There be python!')
else:
print('Sorry, no python.')

# Output a tuple as CSV
course = ('python', 20, 0.3)
print(','.join(str(x) for x in course))

# Data reduction across fields of a data structure
course_info = [
{'name':'python', 'score': 100.0},
{'name':'java', 'score': 85.0},
{'name':'c', 'score': 90.0},
{'name':'c++', 'score': 95.0}
]
min_score = min(cf['score'] for cf in course_info)
print(min_score)


# 显式的传递一个生成器表达式对象
print(sum((x * x for x in num_list)))
# 更加优雅的实现方式,省略了括号
print(sum(x * x for x in num_list))


num_list = [1, 2, 3, 4, 5]
print(sum([x * x for x in num_list]))


print(min(cf['score'] for cf in course_info))
print(min(course_info, key=lambda cf: cf['score']))

三、dict

3.1、原生实现

3.1.1、定义

alien_0 = {"color":"green", "points":5};
color = alien_0["color"]#当key不存在时报KeyError异常
print(alien_0["color"]);#green

alien_0["color"] = "red";#修改值
print(alien_0["color"]);#red

alien_0["age"] = 15;#添加值,注意此处的添加和修改值之间的差别
del alien_0["age"];#删除值

for k, v in alien_0.items():#iterator
print(k+"-"+str(v));

for k in alien_0.keys():#iterator,可以用sorted(alien_0.keys())顺序输出
print(k);

for v in alien_0.values():#iterator,可以用set(alien_0.values())去掉重复值
print(v);

3.1.2、统计

因dict一般是运算的value,所以可以用zip()函数把key和value反转一下,但这个函数是一个只能访问一次的迭代器。

course_score = {
'高等代数': 100.0,
'算法与数据结构': 92.0,
'编译原理': 88.5,
'数学分析': 97.5,
'统计学原理': 90.5
}

min_score = min(zip(course_score.values(), course_score.keys()))
print(f'最低得分课程及得分:{min_score[1]} {min_score[0]}')
max_score = max(zip(course_score.values(), course_score.keys()))
print(f'最高得分课程及得分:{max_score[1]} {max_score[0]}')

"""sorted"""
score_sorted = sorted(zip(course_score.values(), course_score.keys()))
print(score_sorted)

score_and_course = zip(course_score.values(), course_score.keys())
# ok,print is normal
print(min(score_and_course))

print(min(course_score)) # 数学分析
print(max(course_score)) # 高等代数

print(min(course_score.values())) # 88.5
print(max(course_score.values())) # 100.0

print(min(course_score, key=lambda k: course_score[k])) # 编译原理
print(max(course_score, key=lambda k: course_score[k])) # 高等代数

3.1.3、key操作

keys()和values(),需要注意的是values()不支持集合操作,如果需要用values()操作可以先转化为集合再进行操作。

a_dict = {
'x' : 1,
'y' : 2,
'z' : 3
}

b_dict = {
'w' : 10,
'x' : 11,
'y' : 2
}

# Find keys in common ,{'y', 'x'}
print(a_dict.keys() & b_dict.keys())
# Find keys in a that are not in b, {'z'}
print(a_dict.keys() - b_dict.keys())
# Find (key,value) pairs in common, {('y', 2)}
print(a_dict.items() & b_dict.items())

# Make a new dictionary with certain keys removed
c = {key:a_dict[key] for key in a_dict.keys() - {'z', 'w'}}
print(c) # c is {'x': 1, 'y': 2}

3.1.4、value操作

score_dict = {
'math': 95.0,
'java': 90.5,
'python': 100.0,
'sql': 93.0,
'english': 75.5
}

# Make a dictionary of all score over 92,dict推导方式实现
p1 = {key: value for key, value in score_dict.items() if value > 92}
print(p1)#{'math': 95.0, 'python': 100.0, 'sql': 93.0}

# Make a dictionary of tech skill
tech_names = {'python', 'sql', 'java'}
p2 = {key: value for key, value in score_dict.items() if key in tech_names}
print(p2)#{'java': 90.5, 'python': 100.0, 'sql': 93.0}

#这种利用元组的方式比上个例子快一倍的速度
p1 = dict((key, value) for key, value in score_dict.items() if value > 92)

3.2、dict

3.2.1、defaultdict

from collections import defaultdict

default_dict = defaultdict(list)
default_dict['a'].append(1)

default_dict = defaultdict(set)
default_dict['a'].add(1)

d_dict = dict()
d_dict.setdefault('a', []).append(1)

3.2.2、orderedDict(排序)

单字段排序:在需要序列化时就需要用到排序功能,字典默认是无序的,可以用OrderedDict类,这个类内部会维护一个双向链表,所以其占用内存是普通dict的两倍。

from collections import OrderedDict

ordered_dict = OrderedDict()
ordered_dict['a'] = 1
ordered_dict['d'] = 'abc'
ordered_dict['c'] = 'hello world'
for key in ordered_dict:
print(f'get key is:{key}, value is:{ordered_dict[key]}')

3.2.3、ChainMap(运算)

ChainMap并不是真的把dict合并了,即它不会改变原dict的值,它只是重新创建了一块空间,如果有重复的key,则取第一个key的value。

a_dict = {'x': 1, 'z': 3 }
b_dict = {'y': 2, 'z': 4 }

from collections import ChainMap
c_dict = ChainMap(a_dict, b_dict)
# Outputs 1 (from a_dict)
print(c_dict['x'])
# Outputs 2 (from b_dict)
print(c_dict['y'])
# Outputs 3 (from a_dict)
print(c_dict['z'])

print(len(c_dict))
print(list(c_dict.keys()))
print(list(c_dict.values()))


c_dict['z'] = 10
c_dict['w'] = 40
del c_dict['x']
print(a_dict)
# del c_dict['y']

"""可用来定义常量类"""
val_dict = ChainMap()
val_dict['x'] = 1

val_dict = val_dict.new_child()
val_dict['x'] = 2

val_dict = val_dict.new_child()
val_dict['x'] = 3
print(val_dict)
print(val_dict['x'])

val_dict = val_dict.parents
print(val_dict['x'])

val_dict = val_dict.parents
print(val_dict['x'])

print(val_dict)

a_dict = {'x': 1, 'z': 3 }
b_dict = {'y': 2, 'z': 4 }

"""update可用来代替ChainMap,但它的缺点是原字典更新了,不会实时变化"""
dict_merge = dict(b_dict)
dict_merge.update(a_dict)
print(dict_merge['x'])
print(dict_merge['y'])
print(dict_merge['z'])

a_dict['x'] = 10
print(dict_merge['x'])

chain_dict = ChainMap(a_dict, b_dict)
print(chain_dict['x'])

a_dict['x'] = 20
print(chain_dict['x'])

四、enum

4.1、定义

from enum import Enum

class BugStatus(Enum):
new = 7
incomplete = 6
invalid = 5
wont_fix = 4
in_progress = 3
fix_committed = 2
fix_released = 1
#Member name: wont_fix
print('\nMember name: {}'.format(BugStatus.wont_fix.name))

用程序创建枚举,不太建议不直观

import enum

BugStatus = enum.Enum(
value='BugStatus',
names=[
('new', 7),
('incomplete', 6),
('invalid', 5),
],
)

print('All members:')
for status in BugStatus:
print('{:15} = {}'.format(status.name, status.value))

4.1.1、唯一值

枚举在python中是以数值为准的,如果值重复了,则认为是同一个值。在这个例子中不会打印 incomplete

import enum
class BugStatus(enum.Enum):
new = 7
incomplete = 7

for item in BugStatus:
print(f"{item.name} = {item.value}")

@enum.unique

@enum.unique
class BugStatus(enum.Enum):

new = 7
incomplete = 6
invalid = 5
wont_fix = 4
in_progress = 3
fix_committed = 2
fix_released = 1

# This will trigger an error with unique applied.
by_design = 4
closed = 1

4.1.2、创建元组类型的枚举

import enum
class BugStatus(enum.Enum):
new = (7, ['incomplete',
'invalid',
'wont_fix',
'in_progress'])
incomplete = (6, ['new', 'wont_fix'])
fix_released = (1, ['new'])

def __init__(self, num, transitions):
self.num = num
self.transitions = transitions

def can_transition(self, new_state):
return new_state.name in self.transitions


print('Name:', BugStatus.incomplete) #Name: BugStatus.incomplete
print('Value:', BugStatus.incomplete.value) #Value: (6, ['new', 'wont_fix'])
print('Custom attribute:', BugStatus.incomplete.transitions) #Custom attribute: ['new', 'wont_fix']
print('Using attribute:', BugStatus.incomplete.can_transition(BugStatus.new)) #Using attribute: Trueimport enum


class BugStatus(enum.Enum):
new = {
'num': 7,
'transitions': [
'incomplete',
'invalid',
'wont_fix',
'in_progress',
],
}
in_progress = {
'num': 3,
'transitions': ['new', 'fix_committed'],
}
fix_released = {
'num': 1,
'transitions': ['new'],
}

def __init__(self, vals):
self.num = vals['num']
self.transitions = vals['transitions']

def can_transition(self, new_state):
return new_state.name in self.transitions


print('Name:', BugStatus.in_progress)
print('Value:', BugStatus.in_progress.value)
print('Custom attribute:', BugStatus.in_progress.transitions)
print('Using attribute:',
BugStatus.in_progress.can_transition(BugStatus.new))

4.2、操作

###4.2.1、迭代

for status in BugStatus:
print('{:15} = {}'.format(status.name, status.value))

###4.2.2、比较

import enum

class BugStatus(enum.Enum):
wont_fix = 4
fix_released = 1

actual_state = BugStatus.wont_fix
desired_state = BugStatus.fix_released
desired_state1 = BugStatus.wont_fix

# Equality: False True True
print('Equality:', actual_state == desired_state, actual_state == desired_state1, actual_state == BugStatus.wont_fix)

###4.2.3、排序

按值排序,然后输出name

import enum
class BugStatus(enum.IntEnum):
in_progress = 3
fix_committed = 2
fix_released = 1

print('Ordered by value:')
print('\n'.join(' ' + s.name for s in sorted(BugStatus)))
网友评论