python调库自动检测转码GBKToUTF8
发布时间:2023-09-10 11:56:22 152 相关标签:
原来文件只有GBK,UTF8以及ASCII
自动检测GBK转码成不带BOM的UTF-8
题外话
Python的格式经常乱tab,就很烦
安装基础工具
apt-get install python3
apt-get install pip3
pip3 install chardet
+./debug/gbk2u8.py
+./src/
+./inc/
gbk2u8.py 文件代码
#-*- coding:utf-8 -*-
import os
import operator
import chardet
def strequal(a, b):
return operator.eq(str(a), str(b))
def getFileencode(filename):
with open(filename, 'rb') as f:
en=chardet.detect(f.read())
##print(en)
return en['encoding']
def fileGb2312ToUtf8(filename):
file_en=getFileencode(filename)
if strequal(file_en,'GB2312'):
print(filename+"gbk ",file_en)
content = open(filename,'rb').read()
new_content = content.decode('GB18030').encode('utf-8')
open(filename, 'wb').write(new_content)
elif strequal(file_en,'UTF-8-SIG'):
s = open(filename, mode='r', encoding='UTF-8-SIG').read()
open(filename, mode='w', encoding='utf-8').write(s)
else:
print(filename+"other ",file_en)
def fileExtension(filename):
return os.path.splitext(filename)[1]
def isCodeFile(filename):
extension = fileExtension(filename)
return (strequal(extension, '.h') ) or (strequal(extension, '.c')) \
or (strequal(extension, '.ini') ) or (strequal(extension, '.md')) or (strequal(extension, '.sh') )
def dirGb2312ToUtf8(dir):
list = []
for path,dirs,fs in os.walk(dir):
for f in fs:
fullPath = os.path.join(path,f)
list.append(fullPath)
for i, filename in enumerate(list):
if (isCodeFile(filename)):
fileGb2312ToUtf8(filename)
dirGb2312ToUtf8('../')
文章来源: https://blog.51cto.com/datrilla/5885645
特别声明:以上内容(图片及文字)均为互联网收集或者用户上传发布,本站仅提供信息存储服务!如有侵权或有涉及法律问题请联系我们。
举报