From a2ca34a5fa007f51737e3fff7f9bfd695cc223d1 Mon Sep 17 00:00:00 2001 From: shuaikangzhou <863909694@qq.com> Date: Thu, 18 Jan 2024 19:39:43 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E5=A4=84=E7=90=86HardLinkVideoAttribute?= =?UTF-8?q?=E8=A1=A8=E4=B8=8D=E5=AD=98=E5=9C=A8=E7=9A=84=E5=BC=82=E5=B8=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/DataBase/hard_link.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/DataBase/hard_link.py b/app/DataBase/hard_link.py index 881f636..f802bc5 100644 --- a/app/DataBase/hard_link.py +++ b/app/DataBase/hard_link.py @@ -169,6 +169,8 @@ class HardLink: video_db_lock.acquire(True) try: self.video_cursor.execute(sql, [md5]) + except sqlite3.OperationalError: + return None except AttributeError: self.init_database() self.video_cursor.execute(sql, [md5]) @@ -218,6 +220,8 @@ class HardLink: # dir0 = 'Thumb' if thumb else 'Image' dat_image = os.path.join(video_root_path, dir2, data_image) return dat_image + else: + return '' def close(self): if self.open_flag: From 62154823cfbe6135301d05f2c9e6c9c8fd74337f Mon Sep 17 00:00:00 2001 From: shuaikangzhou <863909694@qq.com> Date: Thu, 18 Jan 2024 19:47:36 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dmusic=5Fpath=E8=BD=AC?= =?UTF-8?q?=E4=B9=89=E5=AF=BC=E8=87=B4=E7=9A=84HTML=E5=A4=B1=E6=95=88?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/DataBase/exporter_html.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/DataBase/exporter_html.py b/app/DataBase/exporter_html.py index a657246..2b29f45 100644 --- a/app/DataBase/exporter_html.py +++ b/app/DataBase/exporter_html.py @@ -232,6 +232,7 @@ class HtmlExporter(ExporterBase): is_chatroom = 1 if self.contact.is_chatroom else 0 avatar = self.get_avatar_path(is_send, message) display_name = self.get_display_name(is_send, message) + music_path = escape_js_and_html(music_path) doc.write( f'''{{ type:49, text:'{music_path}',is_send:{is_send},avatar_path:'{avatar}',link_url:'{content.get('link_url')}', timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',sub_type:3,title:'{content.get('title')}', From 4fa56b888a6e40c3f81c9eab78cae98907c04a9a Mon Sep 17 00:00:00 2001 From: shuaikangzhou <863909694@qq.com> Date: Thu, 18 Jan 2024 23:06:37 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=AF=BC=E5=87=BAdocx?= =?UTF-8?q?=E7=9A=84=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/DataBase/exporter_docx.py | 90 +++++++++++++++++++++++++++++++++-- app/DataBase/exporter_html.py | 15 ++---- app/DataBase/output.py | 8 +++- app/DataBase/output_pc.py | 6 +-- 4 files changed, 98 insertions(+), 21 deletions(-) diff --git a/app/DataBase/exporter_docx.py b/app/DataBase/exporter_docx.py index 2fe4e51..48ebfdb 100644 --- a/app/DataBase/exporter_docx.py +++ b/app/DataBase/exporter_docx.py @@ -25,6 +25,7 @@ encoded_chars = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11\x # 创建一个字典,将要删除的字符映射为 None char_mapping = {char: None for char in encoded_chars} + def filter_control_characters(input_string): """ 过滤掉不可打印字符 @@ -39,6 +40,84 @@ def filter_control_characters(input_string): class DocxExporter(ExporterBase): + def merge_docx(self, n): + self.process_num += 1 + conRemark = self.contact.remark + origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{conRemark}" + filename = f"{origin_docx_path}/{conRemark}_{n}.docx" + # print(all_file_path) + doc = docx.Document(filename) + if self.merged_doc_index == [-1, -1]: + self.document.append(doc) + self.merged_doc_index = [n, n] + else: + if n == self.merged_doc_index[0] - 1: + self.document.insert(0, doc) + self.merged_doc_index[0] -= 1 + elif n == self.merged_doc_index[1] + 1: + self.document.append(doc) + self.merged_doc_index[1] += 1 + else: + self.docs.append([doc, n]) + self.docs_set.add(n) + new_docx = [] + new_set = set() + # print(self.docs) + while new_set!=self.docs_set: + self.docs.sort(key=lambda x: x[1]) + for doc_, index in self.docs: + if index == self.merged_doc_index[0] - 1: + self.document.insert(0, doc_) + self.merged_doc_index[0] -= 1 + elif index == self.merged_doc_index[1] + 1: + self.document.append(doc_) + self.merged_doc_index[1] += 1 + else: + new_docx.append([doc_, index]) + new_set.add(index) + self.docs = new_docx + self.docs_set = new_set + os.remove(filename) + if self.process_num == self.child_thread_num: + # self.document.append(self.document) + file = os.path.join(origin_docx_path, f'{conRemark}.docx') + try: + self.document.save(file) + except PermissionError: + file = file[:-5] + f'{time.time()}' + '.docx' + self.document.save(file) + self.okSignal.emit(1) + + def export(self): + self.child_threads = [] + messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range) + # 计算每个子列表的长度 + num = 1 + # num = len(messages) // 500 +1 + sublist_length = len(messages) // num + + # 使用列表切片将列表分成n个子列表 + divided_list = [messages[i:i + sublist_length] for i in range(0, len(messages), sublist_length)] + self.child_thread_num = len(divided_list) + self.process_num = 0 + doc = docx.Document() + doc.styles["Normal"].font.name = "Cambria" + doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体") + self.document = Composer(doc) + self.merged_doc_index = [-1, -1] + self.docs = [] + self.docs_set = set() + # self.document.append(self.document) + for i in range(self.child_thread_num): + child_thread = DocxExporterChildThread(self.contact, type_=self.DOCX, message_types=self.message_types, + time_range=self.time_range, messages=divided_list[i], index=i) + self.child_threads.append(child_thread) + child_thread.okSignal.connect(self.merge_docx) + child_thread.progressSignal.connect(self.progressSignal) + child_thread.start() + + +class DocxExporterChildThread(ExporterBase): def text(self, doc, message): type_ = message[2] str_content = message[7] @@ -313,7 +392,7 @@ class DocxExporter(ExporterBase): def export(self): print(f"【开始导出 DOCX {self.contact.remark}】") origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" - messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range) + messages = self.messages Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png")) if self.contact.is_chatroom: for message in messages: @@ -329,6 +408,8 @@ class DocxExporter(ExporterBase): self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png")) self.rangeSignal.emit(len(messages)) + index = 0 + def newdoc(): nonlocal n, doc doc = docx.Document() @@ -347,8 +428,8 @@ class DocxExporter(ExporterBase): for index, message in enumerate(messages): if index % 200 == 0 and index: # doc = document.add_paragraph() - filename = os.path.join(origin_docx_path, f"{self.contact.remark}{n}.docx") - doc.save(filename) + # filename = os.path.join(origin_docx_path, f"{self.contact.remark}{n}.docx") + # doc.save(filename) newdoc() type_ = message[2] @@ -391,6 +472,7 @@ class DocxExporter(ExporterBase): for index, dx in enumerate(docs[::-1]): print(f"【合并 DOCX {self.contact.remark}】{index + 1}/{len(docs)}") doc.insert(0, dx) + filename = os.path.join(origin_docx_path, f"{self.contact.remark}_{self.index}.docx") try: # document.save(filename) doc.save(filename) @@ -399,4 +481,4 @@ class DocxExporter(ExporterBase): # document.save(filename) doc.save(filename) print(f"【完成导出 DOCX {self.contact.remark}】") - self.okSignal.emit(1) + self.okSignal.emit(self.index) diff --git a/app/DataBase/exporter_html.py b/app/DataBase/exporter_html.py index 2b29f45..f6ff22f 100644 --- a/app/DataBase/exporter_html.py +++ b/app/DataBase/exporter_html.py @@ -127,10 +127,7 @@ class HtmlExporter(ExporterBase): if file_path != "": file_path = './file/' + file_info.get('file_name') doc.write( - f'''{{ type:49, text: '{file_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp} - ,is_chatroom:{is_chatroom},displayname:'{display_name}',icon_path: '{icon_path}' - ,sub_type:6,file_name: '{file_info.get('file_name')}',file_size: '{file_info.get('file_len')}' - ,app_name: '{file_info.get('app_name')}'}},''' + f'''{{ type:49, text: '{file_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',icon_path: '{icon_path}',sub_type:6,file_name: '{file_info.get('file_name')}',file_size: '{file_info.get('file_len')}',app_name: '{file_info.get('app_name')}'}},''' ) def refermsg(self, doc, message): @@ -234,9 +231,7 @@ class HtmlExporter(ExporterBase): display_name = self.get_display_name(is_send, message) music_path = escape_js_and_html(music_path) doc.write( - f'''{{ type:49, text:'{music_path}',is_send:{is_send},avatar_path:'{avatar}',link_url:'{content.get('link_url')}', - timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',sub_type:3,title:'{content.get('title')}', - artist:'{content.get('artist')}', website_name:'{content.get('website_name')}'}},''' + f'''{{ type:49, text:'{music_path}',is_send:{is_send},avatar_path:'{avatar}',link_url:'{content.get('link_url')}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',sub_type:3,title:'{content.get('title')}',artist:'{content.get('artist')}', website_name:'{content.get('website_name')}'}},''' ) def share_card(self, doc, message): @@ -266,11 +261,7 @@ class HtmlExporter(ExporterBase): else: app_logo = card_data.get('app_logo') doc.write( - f'''{{ type:49,sub_type:5, text:'',is_send:{is_send},avatar_path:'{avatar}',url:'{card_data.get('url')}', - timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',title:'{card_data.get('title')}', - description:'{card_data.get('description')}',thumbnail:'{thumbnail}',app_logo:'{app_logo}', - app_name:'{card_data.get('app_name')}' - }},\n''' + f'''{{ type:49,sub_type:5, text:'',is_send:{is_send},avatar_path:'{avatar}',url:'{card_data.get('url')}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',title:'{card_data.get('title')}',description:'{card_data.get('description')}',thumbnail:'{thumbnail}',app_logo:'{app_logo}',app_name:'{card_data.get('app_name')}'}},\n''' ) def export(self): diff --git a/app/DataBase/output.py b/app/DataBase/output.py index 6aa91c7..b35abf4 100644 --- a/app/DataBase/output.py +++ b/app/DataBase/output.py @@ -90,19 +90,23 @@ class ExporterBase(QThread): CONTACT_CSV = 4 TXT = 5 - def __init__(self, contact, type_=DOCX, message_types={},time_range=None, parent=None): + def __init__(self, contact, type_=DOCX, message_types={}, time_range=None, messages=None,index=0, parent=None): super().__init__(parent) self.message_types = message_types # 导出的消息类型 self.contact: Contact = contact # 联系人 self.output_type = type_ # 导出文件类型 self.total_num = 1 # 总的消息数量 self.num = 0 # 当前处理的消息数量 + self.index = index # self.last_timestamp = 0 self.time_range = time_range + self.messages = messages origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" makedirs(origin_docx_path) + def run(self): self.export() + def export(self): raise NotImplementedError("export method must be implemented in subclasses") @@ -166,4 +170,4 @@ class ExporterBase(QThread): return def share_card(self, doc, message): - return \ No newline at end of file + return diff --git a/app/DataBase/output_pc.py b/app/DataBase/output_pc.py index d7dcfed..6905b45 100644 --- a/app/DataBase/output_pc.py +++ b/app/DataBase/output_pc.py @@ -3,7 +3,7 @@ import os import traceback from typing import List -from PyQt5.QtCore import pyqtSignal, QThread +from PyQt5.QtCore import pyqtSignal, QThread, QObject from PyQt5.QtWidgets import QFileDialog from app.DataBase.exporter_csv import CSVExporter @@ -20,7 +20,7 @@ from ..util.image import get_image os.makedirs('./data/聊天记录', exist_ok=True) -class Output(QThread): +class Output(QObject): """ 发送信息线程 """ @@ -222,7 +222,7 @@ class Output(QThread): Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one) Child.start() - def run(self): + def start(self): if self.output_type == self.DOCX: self.to_docx(self.contact, self.message_types) elif self.output_type == self.CSV_ALL: