【Python】open&zip&print | 郭飞的笔记

open

可以open文本文件和二进制文件

x=open("data.txt", encoding='utf-8', mode="r")
for i in x:
    print(i) # 相当于x.readline()

open

f=open(...)

读写模式

r:只读，如果文件不存在，则输出错误
w:只写，如果文件不存在，则创建
a:只写，在末尾追加写入
x:只写，如果文件存在，则报错
r+：读写

b代表二进制，t代表文本模式（默认）

rb:只读二进制，如果文件不存在，则输出错误
wb:只写二进制，如果文件不存在，则自动创建
ab:附加到二进制文件末尾

f.read() # 返回字符串，内容是游标后到文件末尾的全部内容
f.read(100) # 返回游标之后的100个字符（没有100个字符则返回实际行数）
f.readline() # 返回下一行内容的字符串,结尾返回空字符串
f.realines() # 返回list，每项是字符串，含\n
#readlines([size])
for i in f:print(i) # i是str(如果是b模式open的，是bytes)
list(f) # [i for i in f]

f.seek(0) # 设置游标位置到0，游标位置
#f.seek(off, whence) where为0表示从文件开始算起,1表示从当前位置算,2表示从文件结尾算，off可正可负.只有b模式，才允许1,2.
f.tell() # 返回当前的游标位置编号

f.write(str) # 把文本或二进制写入文件。并不立即写入文件，而是close时写入文件。不会自动加换行符
f.writelines(<list>) # 把list写入文件，不会在元素之间自动加逗号、空格等
f.close() # 写入数据会在这一步实际写入到硬盘中

flush() # 清空I/O缓存，并把数据强行写回磁盘

注1：用readline取数后，下面的for in语句中，将不会从文件头开始，因为游标已经移动

对话框IO

str_1 = input("Enter a string: ")

zip

新建压缩包

import zipfile

# 新建压缩包
zip1 = zipfile.ZipFile('filename.zip', 'w')
# w: 如果 zip 文件存在，则覆盖。a: 如果 zip 文件存在，后续的写入方式是追加

# 将bb件夹（及其下aa.txt）添加到压缩包,压缩算法LZMA
zip1.write('path/file1.txt', compress_type=zipfile.ZIP_LZMA)

# writestr会新建文件夹和文件，并把data写入到文件中，data可以是str或者是byte。
# 这里是新建一个bb文件夹，其下再新建一个cc.txt,将hello world写入到文本中
zip1.writestr('path2/file_c.txt', data='Hello World', compress_type=zipfile.ZIP_DEFLATED)

zip1.close()

文件信息

import zipfile

zip1 = zipfile.ZipFile('filename.zip')

zip1.namelist()  # 列出所有文件
zip1.filename  # zip 文件名，'filename.zip'
zip1.filelist # zip 所打包的文件列表
zip1.pwd
zip1.compression
zip1.compresslevel

file_info = zip1.getinfo('path/file1.txt')
file_info.file_size, file_info.compress_size  # 压缩前大小，压缩后大小

读入内容到内存

# read出来的是 byte，decode成utf-8即可
content = zip1.read('path/file1.txt').decode('utf-8')

解压

# 解压单个文件
zip1.extract('path/file1.txt')

# 全解压
zip1.extractall()
zfile.extractall(pwd=b'123') # 密码正确则解压，密码不正确则报错

额外：不用文件名，而是 mem = io.BytesIO()，则可以把zip文件放到内存中，而不是磁盘中

mem = io.BytesIO()
zip1 = zipfile.ZipFile(mem, 'w')
zip1.writestr('path2/file_c.txt', data='Hello World', compress_type=zipfile.ZIP_DEFLATED)
zip1.close()

# 如何把内存中的zip文件写到磁盘
with open('file2.zip', 'wb') as f:
    mem.seek(0)
    f.write(mem.read())

如何改一个 zip 文件的注释，然后把它存下来

from zipfile import ZipFile

with ZipFile("example.zip", 'a') as zipf:
    zipf.comment = "更改 zip 文件的注释".encode("utf-8")

with ZipFile("example.zip", 'r') as zipf:
    print(zipf.comment.decode())

mac 和 linux 可以用 zipnote example.zip 来查看注释

print

print(*objects, sep=' ', end='\n', file=sys.stdout, flush=False)
# objects -- 一个或多个，待 print 的对象
# sep -- 用来间隔多个对象，默认 空格。
# end -- 用来设定以什么结尾。默认 '\n'
# file -- 要写入的文件对象。默认 sys.stdout
# flush -- 与 file 相关，为 True 则立即写入

f = open('a.txt', 'a')
print('1', '2', '3', file=f, sep=',')
f.close()

print多彩字(仅限 Linux)

import sys

def clear_traces(): # 清除一行
    # 直接用 print 也可以
    sys.__stdout__.write('\033[2K\r')


def hide_cursor(): # 隐藏光标
    sys.__stdout__.write('\033[?25l')


def show_cursor(): # 显示光标
    sys.__stdout__.write('\033[?25h')


def go_up(n=20): # 光标向上移动 n 行
    up_command = '\033[{}A'.format(n)
    print(up_command)


def flush():
    sys.__stdout__.flush()


# 打印多色字体
# "格式为 \033[{字体}{字色}{背景色} 文字部分 \033[0m"
# 字体: 0（默认）、1（高亮）、22（非粗体）、4（下划线）、24（非下划线）、 5（闪烁）、25（非闪烁）、7（反显）、27（非反显）
# 字色: 30（黑色）、31（红色）、32（绿色）、 33（黄色）、34（蓝色）、35（洋 红）、36（青色）、37（白色）
# 背景色: 40（黑色）、41（红色）、42（绿色）、 43（黄色）、44（蓝色）、45（洋 红）、46（青色）、47（白色）

# 例子
print("\033[4;31;47m 下划线、红字、白底\033[0m")
print("\033[1;33m 高亮、黄字\033[0m")
print("\033[5;41m 闪烁、红底\033[0m")

office 文件

依赖：

openpyxl
pip install python-docx
pip install python-pptx

from docx import Document

def get_word(filename):
    text_runs = []
    doc = Document(filename)
    for paragraph in doc.paragraphs:
        stripped = paragraph.text.strip()
        if stripped:
            text_runs.append(paragraph.text)
    return '\n'.join(text_runs)

from pptx import Presentation

def get_ppt(filename):
    text_runs = []
    prs = Presentation(filename)
    for slide in prs.slides:
        for shape in slide.shapes:
            # 文本框
            if shape.has_text_frame:
                for paragraph in shape.text_frame.paragraphs:
                    stripped = paragraph.text.strip()
                    if stripped:
                        text_runs.append(paragraph.text)
            # 表格
            if shape.has_table:
                for row in shape.table.rows:
                    text_runs.append(','.join(cell.text for cell in row.cells))

            #         图表
            if shape.has_chart:
                pass

    return '\n'.join(text_runs)

from openpyxl import load_workbook

def get_excel(filename):
    text_runs = []
    wb = load_workbook(filename=filename)
    for s in wb.sheetnames:
        ws = wb[s]
        for row in ws.rows:
            this_row = []
            for cell in row:
                this_row.append(cell.value)
            text_runs.append(this_row)

    return '\n'.join(str(item) for item in text_runs)