From 77b272f6af98c3aa4d95f07966be24a2ca5fc136 Mon Sep 17 00:00:00 2001 From: shuaikangzhou <863909694@qq.com> Date: Fri, 12 Jan 2024 20:03:15 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=B8=8D=E5=8F=AF=E6=89=93?= =?UTF-8?q?=E5=8D=B0=E5=AD=97=E7=AC=A6=E5=AF=BC=E8=87=B4=E7=9A=84docx?= =?UTF-8?q?=E5=AF=BC=E5=87=BA=E5=A4=B1=E8=B4=A5#297?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/DataBase/exporter_docx.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/app/DataBase/exporter_docx.py b/app/DataBase/exporter_docx.py index b1e5272..70ca9de 100644 --- a/app/DataBase/exporter_docx.py +++ b/app/DataBase/exporter_docx.py @@ -18,8 +18,12 @@ from app.person import Me from app.util.compress_content import parser_reply, share_card, music_share from app.util.image import get_image_abs_path from app.util.music import get_music_path -import string +# 要删除的编码字符 +encoded_chars = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' + +# 创建一个字典,将要删除的字符映射为 None +char_mapping = {char: None for char in encoded_chars} def filter_control_characters(input_string): """ @@ -27,20 +31,12 @@ def filter_control_characters(input_string): @param input_string: @return: """ - # 创建一个包含所有可打印字符的字符串 - printable_chars = set(string.printable) # 过滤掉非可打印字符 - filtered_string = ''.join(char for char in input_string if char in printable_chars) + filtered_string = input_string.translate(char_mapping) return filtered_string -def is_control_char(ch): - '''Whether a control character. - https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python - ''' - return unicodedata.category(ch)[0] == 'C' - class DocxExporter(ExporterBase): def text(self, doc, message): @@ -57,9 +53,12 @@ class DocxExporter(ExporterBase): try: content_cell.paragraphs[0].add_run(str_content) except ValueError: - logger.error(f'非法字符:{str_content}') - str_content = filter_control_characters(str_content) - content_cell.paragraphs[0].add_run(str_content) + try: + str_content = filter_control_characters(str_content) + content_cell.paragraphs[0].add_run(str_content) + except ValueError: + logger.error(f'非法字符:{str_content}') + content_cell.paragraphs[0].add_run('非法字符') content_cell.paragraphs[0].font_size = shared.Inches(0.5) if is_send: p = content_cell.paragraphs[0]