看代码有时分电脑不方便,有个kindle吃灰在,收拾了下 有些学习资料里边的代码,没事能够读读看。
东西部分源码来源于网络,经过自己收拾合适自己用,相信有些功能我们或许也会用的到。里边详细运用方法我会说明下,还有问题我们能够联络我
文章包括,介绍按或许运用的次序
- 一 去重相同的代码文件的东西
- 二 ansi编码转化为utf8编码东西
- 三 code转化代码为pdf东西
- 四 code转化为word东西
一 去重东西代码,因为用的少,代码没有时间多做收拾 ,用的时分我们自己调整 代码中部分代码以习惯自己想需求,能用就行
-
这个去重断定的是文件md5码,运用场景,许多相同文件 涣散在不同目录,直接做code2pdf或者code2word 重复的太多了,无法看。假如你的code 没有这样的问题不需求履行这个。
-
指定要处理的目录 root_path = r”.\aa” 例如
-
suffix_list 界说的部分 我们需求看那种代码 就把那种代码后缀放后面
-
os.system(“rm ” +fileMap[fileMap]) 部分,我们需求就根据自己系统调整活着写成 os.remove(fileMap[fileMap])慎重运用默许关着。 我这里是打印保存出来自己承认下,然后手艺履行
print("\nrm " +fileMap[xmd5])
import hashlib
import os
# 指定根目录
root_path = r"e:\aa"
# 指定排除目录
exclude_path_list = [r"./build", ]
# 指定需求更改的文件后缀
suffix_list = [r".cpp", r".c", r".java", ]
suffix_list = [r".dart", ]
count = 0
fileMap={}
def check_file_suffix(file, suffix_list):
for p in suffix_list:
if file.endswith(p):
return True
return False
def check_path_exclude(path, exclude_path_list):
for p in exclude_path_list:
if path.startswith(p):
return True
return False
def GetFileMd5(filename):
if not os.path.isfile(filename):
return
myhash = hashlib.md5()
f = open(filename,'rb')
while True:
b = f.read(8096)
if not b :
break
myhash.update(b)
f.close()
return myhash.hexdigest()
def main():
global count
for path, subdirs, files in os.walk(root_path):
if check_path_exclude(path, exclude_path_list):
continue
for name in files:
file = os.path.join(path, name)
if check_file_suffix(file, suffix_list):
xmd5 = (GetFileMd5(file))
count += 1
if xmd5 in fileMap.keys():
#os.system("rm " +fileMap[fileMap])
print("\nrm " +fileMap[xmd5])
else:
pass
fileMap[xmd5] = file
if __name__ == '__main__':
main()
履行结果如下
承认屏幕输出 然后保存为批处理文件,或者仿制了输出,直接翻开控制台粘贴回车即可删去里边重复的内容了。
二 ansi编码转化为utf8编码东西
有时分遇到有些code源文件的编码不是utf8的,直接转化出来是乱码的,假如有需求您能够运用这个东西,这个跟上一个我们结构差不多。设置装备部分也是相同,参阅上面的即可设置合适你要求的
代码如下
import os
import chardet
# 指定根目录
root_path = r"./dp"
# 指定排除目录
exclude_path_list = [r"./build", ]
# 指定需求更改的文件后缀
suffix_list = [r".cpp", r".c", r".java", r".dart", ]
count = 0
def check_file_suffix(file, suffix_list):
for p in suffix_list:
if file.endswith(p):
return True
return False
def check_path_exclude(path, exclude_path_list):
for p in exclude_path_list:
if path.startswith(p):
return True
return False
def main():
global count
for path, subdirs, files in os.walk(root_path):
if check_path_exclude(path, exclude_path_list):
continue
for name in files:
file = os.path.join(path, name)
if check_file_suffix(file, suffix_list):
print(file)
with open(file, 'rb') as f:
c = chardet.detect(f.read())
s = open(file, mode='r', encoding=c['encoding']).read() # UTF-8 with BOM
open(file, mode='w', encoding='utf-8').write(s) # UTF-8 without BOM
count += 1
print("共", count, "个文件,转化结束")
if __name__ == '__main__':
main()
履行结果如下
三 code转化代码为pdf东西
代码没有上面的的问题 想转成pdf的能够参阅此东西
- 参数设置 -dst “./aa” -out codeaa.pdf dart
- 其间 ./aa 为代码目录, codeaa.pdf 为生成文件 dart是代码后缀
import argparse
import os
import sys
from glob import glob
import html
from xhtml2pdf import pisa # import python module
# Lines will be broken if over specified char count
BREAK_LIMIT = 110
output_filename = "out.pdf"
# Define your data
MAIN_TEMPLATE = """
<html>
<style>
body{
font-family:STSong-Light;
}
.code {
border: 1px solid grey;
padding: 1px;
overflow: hide;
font-family:STSong-Light;
}
pre {
font-size:12px;
font-family:STSong-Light;
}
</style>
<body>
<h1>Code Listing</h1>
%%%code%%%
</body>
</html>"""
CODE_TEMPLATE = """
<hr><div>
<h1>%%%name%%%</h1>
<div class="code">
<pre>%%%snippet%%%</pre>
</div>
</div>"""
# ACCEPTED_EXTENSIONS
def convert_html_to_pdf(source_html, output_filename):
result_file = open(output_filename, "w+b")
pisa_status = pisa.CreatePDF(
source_html, # the HTML to convert
dest=result_file) # file handle to recieve result
result_file.close() # close output file
return pisa_status.err
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Convert files in destination into a pdf')
parser.add_argument('-dst',
help='folder containing your source files',
required=True)
parser.add_argument('-out',
help='name of pdf to output',
default="out.pdf")
parser.add_argument('ext',
help='list of file extensions to parse',
nargs="*")
args = parser.parse_args()
if args.dst[-1] != "/": args.dst += "/"
files_grabbed = []
for type in args.ext:
files_grabbed.extend(glob(args.dst + "**/*." + type, recursive=True))
source_html = MAIN_TEMPLATE
code_html = ""
for file in files_grabbed:
with open(file, 'r',encoding='UTF-8') as f:
lines = f.readlines()
contents = ""
for line in lines:
if len(line) > BREAK_LIMIT:
contents += line[:BREAK_LIMIT] + "\n" + line[BREAK_LIMIT:]
else:
contents += line
contents = contents.replace(" ", " ")
contents = contents.replace("\t", " ")
code_html += CODE_TEMPLATE.replace(
"%%%name%%%",
file.replace(args.dst, "")).replace("%%%snippet%%%",
html.escape(contents))
pisa.showLogging()
convert_html_to_pdf(source_html.replace("%%%code%%%", code_html), args.out)
- 履行结果
4.生成文件
5.存在的问题和缺陷 出产的文件,电脑什么的看是没什么问题了,或许你的电子书不一定合适里边的编码方式仍是出现的乱码,处理的办法有以下几个
- pdf修改的东西翻开,另存为图像格式的,缺陷文件较大
- 专业的pdf东西翻开,设置一些编码方面的翻开,详细自行查询
- pdf修改东西翻开,ctrl a 然后ctrl c ,新建word文档 ctrl v 保存后,邮件发送到kindle。
四 code转化为word东西
假如不喜欢pdf格式的 能够直接转化到word文档,能够运用下面东西设置部分如下
-
命令参数 -s .\dp* -m “deep” -t e:\testjava.docx -i “java|yml”
-
.\dp* 代码途径,设置为你想要的
-
e:\testjava.docx 生成的文件
-
“java|yml” 代码的后缀
import os, sys, getopt
import codecs
from docx import Document
doc = Document()
errorlist = []
def convert(dir, mode='flat', title=None, include=None, exclude=None, encoding='utf-8'):
print('copy from diretory: ' + dir)
if title is not None:
doc.add_heading(title, 1)
if include is not None:
inc = include.split('|')
else:
inc = None
if exclude is not None:
exc = exclude.split('|')
else:
exc = None
if mode == 'flat':
walkflat(dir, inc, exc, encoding)
elif mode == 'deep':
walkdeep(dir, 2, inc, exc, encoding)
else:
print('mode is invaild')
def walkflat(dir, inc, exc, encoding):
currentdir = ''
for root, dirs, files in os.walk(dir, False):
for file in files:
if file == 'pom.xml':
print(1)
if (inc is None or os.path.splitext(file)[1][1:] in inc) and (
exc is None or os.path.splitext(file)[1][1:] not in exc):
filepath = os.path.join(root, file).replace('\', '/')
try:
with codecs.open(filepath, encoding=encoding) as f:
content = f.read()
thisdir = filepath[len(dir) + 1:filepath.rfind('/')]
if currentdir != thisdir:
currentdir = thisdir
doc.add_heading(thisdir, 2)
print('into directory ' + thisdir)
doc.add_heading(filepath[filepath.rfind('/') + 1:], 3)
doc.add_paragraph(content)
doc.add_page_break()
print('copied ' + filepath[filepath.rfind('/') + 1:])
except Exception as e:
errorlist.append(filepath)
print('read ' + filepath + ' error')
print(str(e))
def walkdeep(root, level, inc, exc, encoding):
for file in os.listdir(root):
filepath = os.path.join(root, file).replace('\', '/')
if os.path.isfile(filepath):
if (inc is None or os.path.splitext(file)[1][1:] in inc) and (
exc is None or os.path.splitext(file)[1][1:] not in exc):
try:
with codecs.open(filepath, encoding=encoding) as f:
content = f.read()
doc.add_heading(filepath[filepath.rfind('/') + 1:], level)
doc.add_paragraph(content)
doc.add_page_break()
print('copied ' + filepath[filepath.rfind('/') + 1:])
except Exception as e:
errorlist.append(filepath)
print('read ' + filepath + ' error')
print(str(e))
else:
if level<9:
doc.add_heading(file, level)
print('into directory ' + file)
walkdeep(filepath, level + 1, inc, exc, encoding)
if __name__ == '__main__':
src = None
mode = 'flat'
target = None
include = None
exclude = None
encoding = 'utf-8'
myhelp = 'run.py -s <source directory path> -m 'flat|deep' -t <target docx file path>\
-i <include extension of scanned files> -e <exclude extension of scanned files>\
-c <encoding of the files>'
argv = sys.argv[1:]
try:
opts, args = getopt.getopt(argv,'hs:m:t:i:e:c:',['source=','mode=','target=','include=','exclude=','encoding='])
except expression as identifier:
print(myhelp)
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print(myhelp)
sys.exit(2)
elif opt in ('-s','--source'):
src = arg
elif opt in ('-m','--mode'):
mode = arg
elif opt in ('-t','--target'):
target = arg
elif opt in ('-i','--include'):
include = arg
elif opt in ('-e','--exclude'):
exclude = arg
elif opt in ('-c','--encoding'):
encoding = arg
if src is None or target is None:
print('source and target is needed')
sys.exit(2)
pos = src.find('*')
if pos == -1:
convert(src, mode=mode, include=include, exclude=exclude, encoding=encoding)
else:
presrc = src[0:pos]
dirs = os.listdir(presrc)
for dir in dirs:
convert(presrc + dir + src[pos+1:], mode=mode, title=dir, include=include, exclude=exclude, encoding=encoding)
doc.save(target)
print('\nfinish copying, your document is saved into "'+target+'" , thanks for your using!')
if len(errorlist) != 0:
print('\nerror file list:\n')
for e in errorlist:
print(e)
- 履行结果
6. 生成文件截图示例
7.缺陷是现在适用10级目录以内的文件目录
以上程序运转环境为windows10 pycharm 社区版
以上是针对懂点python的同学,假如有需求pc版本的,能够私信我 假如需求的人多,或许能够考虑弄个玩玩。
- 代码放置 gitee.com/gammergr/co…
- 同步到 github.com/gammergr/co…