当前位置 : 主页 > 编程语言 > python >

Python从门到精通(五):文件处理-02-CSV文件处理

来源:互联网 收集:自由互联 发布时间:2022-06-27
解决CSV文件需要用到csv库,如果需要做分析需要引入pandas库。 一、读处理 - - course . scv - - CourseName , Score , Date , Time , Finished , TotalClass python , 0.5 , 6 / 1 / 2020 , 8 : 30 am , 3 , 30 java , 0.2 , 6

解决CSV文件需要用到csv库,如果需要做分析需要引入pandas库。

一、读处理

--course.scv--
CourseName,Score,Date,Time,Finished,TotalClass
python,0.5,6/1/2020,8:30am,3,30
java,0.2,6/1/2020,9:30am,2,20
php,0.25,6/1/2020,10:30am,5,10import csv
with open('course.csv') as f:
f_csv = csv.reader(f)
header_list = next(f_csv)
for row in f_csv:
#这种用下标的方式比较容易出错
print(f'course name: {row[0]}, total class: {row[5]}')

from collections import namedtuple
with open('course.csv') as f:
f_csv = csv.reader(f)
headings = next(f_csv)
#所以建议把下载换成这种方式
Row = namedtuple('Row', headings)
for r in f_csv:
row = Row(*r)
print(f'course name: {row.CourseName}, total class: {row.TotalClass}')
#装数据读取到dict中
import csv
with open('course.csv') as f:
f_csv = csv.DictReader(f)
for row in f_csv:
print(f'dict read: {row}')

二、写处理

import csv
from collections import namedtuple
"""========写一个csv文件=========="""
header_list = ['CourseName','Score','Date','Time','Finished','TotalClass']
row_list = [('python', 0.5, '6/1/2020', '8:30am', 3, 30),
('java', 0.2, '6/1/2020', '9:30am', 2, 20),
('php', 0.25, '6/1/2020', '10:30am', 5, 10),
]

with open('course.csv','w') as f:
f_csv = csv.writer(f)
f_csv.writerow(header_list)
f_csv.writerows(row_list)

#字典方式写
header_list = ['CourseName','Score','Date','Time','Finished','TotalClass']
row_list = [{'CourseName': 'python', 'Score': 0.5, 'Date': '6/1/2020',
'Time': '8:30am', 'Finished': 3, 'TotalClass': 30},
{'CourseName': 'java', 'Score': 0.2, 'Date': '6/1/2020',
'Time': '9:30am', 'Finished': 2, 'TotalClass': 20},
{'CourseName': 'php', 'Score': 0.25, 'Date': '6/1/2020',
'Time': '10:30am', 'Finished': 5, 'TotalClass': 10},
]
with open('course.csv','w') as f:
f_csv = csv.DictWriter(f, header_list)
f_csv.writeheader()
f_csv.writerows(row_list)

#以逗号分隔的csv文件处理
with open('course.csv') as f:
for line in f:
row_list = line.split(',')
print(f'split row: {row_list}')
#以tab分隔的csv文件处理
with open('course.csv') as f:
f_tsv = csv.reader(f, delimiter='\t')
for row in f_tsv:
print(f'read row: {row}')

#处理非法表头
import re
with open('course.csv') as f:
f_csv = csv.reader(f)
header_list = [re.sub('[^a-zA-Z_]', '_', h) for h in next(f_csv)]
Row = namedtuple('Row', header_list)
for r in f_csv:
row = Row(*r)
print(f'named tuple read: {row}')


col_types = [str, float, str, str, int, int]
with open('course.csv') as f:
f_csv = csv.reader(f)
header_list = next(f_csv)
for row in f_csv:
row = tuple(convert(value) for convert, value in zip(col_types, row))
print(f'row read: {row}')

三、列格式处理

#默认的csv全是字符串类型的,如果需要做其它转换,需手动进行
print('Reading as dicts with type conversion')
field_types = [ ('Score', float),
('Finished', float),
('TotalClass', int) ]

with open('course.csv') as f:
for row in csv.DictReader(f):
row.update((key, conversion(row[key]))
for key, conversion in field_types)
print(f'update row: {row}')

四、数据统计

import pandas

rats = pandas.read_csv('rats.csv', skip_footer=1)
print(rats)

print(rats['Current Activity'].unique())
crew_dispatched = rats[rats['Current Activity'] == 'Dispatch Crew']
print(len(crew_dispatched))
print(crew_dispatched['ZIP Code'].value_counts()[:10])
dates = crew_dispatched.groupby('Completion Date')
print(len(dates))

date_counts = dates.size()
date_counts[0:10]
date_counts.sort()
print(date_counts[-10:])
网友评论