当前位置 : 主页 > 网络编程 > PHP >

python调库自动检测转码GBKToUTF8

来源:互联网 收集:自由互联 发布时间:2023-09-06
原来文件只有GBK,UTF8以及ASCII 自动检测GBK转码成不带BOM的UTF-8 题外话 Python的格式经常乱tab,就很烦 安装基础工具 apt-get install python3 apt-get install pip3 pip3 install chardet +./debug/gbk2u8.py+./src/


原来文件只有GBK,UTF8以及ASCII


自动检测GBK转码成不带BOM的UTF-8


题外话

Python的格式经常乱tab,就很烦

安装基础工具

apt-get install python3
apt-get install pip3
pip3 install chardet

+./debug/gbk2u8.py +./src/ +./inc/

cd debug
python gbk2u8.py


gbk2u8.py 文件代码


#-*- coding:utf-8 -*-
import os
import operator
import chardet

def strequal(a, b):
return operator.eq(str(a), str(b))
def getFileencode(filename):
with open(filename, 'rb') as f:
en=chardet.detect(f.read())
##print(en)
return en['encoding']
def fileGb2312ToUtf8(filename):
file_en=getFileencode(filename)

if strequal(file_en,'GB2312'):
print(filename+"gbk ",file_en)
content = open(filename,'rb').read()
new_content = content.decode('GB18030').encode('utf-8')
open(filename, 'wb').write(new_content)
elif strequal(file_en,'UTF-8-SIG'):
s = open(filename, mode='r', encoding='UTF-8-SIG').read()
open(filename, mode='w', encoding='utf-8').write(s)
else:
print(filename+"other ",file_en)


def fileExtension(filename):
return os.path.splitext(filename)[1]



def isCodeFile(filename):
extension = fileExtension(filename)
return (strequal(extension, '.h') ) or (strequal(extension, '.c')) \
or (strequal(extension, '.ini') ) or (strequal(extension, '.md')) or (strequal(extension, '.sh') )



def dirGb2312ToUtf8(dir):
list = []
for path,dirs,fs in os.walk(dir):
for f in fs:
fullPath = os.path.join(path,f)
list.append(fullPath)

for i, filename in enumerate(list):
if (isCodeFile(filename)):
fileGb2312ToUtf8(filename)


dirGb2312ToUtf8('../')
网友评论