From a2ca34a5fa007f51737e3fff7f9bfd695cc223d1 Mon Sep 17 00:00:00 2001
From: shuaikangzhou <863909694@qq.com>
Date: Thu, 18 Jan 2024 19:39:43 +0800
Subject: [PATCH 1/3] =?UTF-8?q?=E5=A4=84=E7=90=86HardLinkVideoAttribute?=
 =?UTF-8?q?=E8=A1=A8=E4=B8=8D=E5=AD=98=E5=9C=A8=E7=9A=84=E5=BC=82=E5=B8=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/DataBase/hard_link.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/app/DataBase/hard_link.py b/app/DataBase/hard_link.py
index 881f636..f802bc5 100644
--- a/app/DataBase/hard_link.py
+++ b/app/DataBase/hard_link.py
@@ -169,6 +169,8 @@ class HardLink:
             video_db_lock.acquire(True)
             try:
                 self.video_cursor.execute(sql, [md5])
+            except sqlite3.OperationalError:
+                return None
             except AttributeError:
                 self.init_database()
                 self.video_cursor.execute(sql, [md5])
@@ -218,6 +220,8 @@ class HardLink:
             # dir0 = 'Thumb' if thumb else 'Image'
             dat_image = os.path.join(video_root_path, dir2, data_image)
             return dat_image
+        else:
+            return ''
 
     def close(self):
         if self.open_flag:

From 62154823cfbe6135301d05f2c9e6c9c8fd74337f Mon Sep 17 00:00:00 2001
From: shuaikangzhou <863909694@qq.com>
Date: Thu, 18 Jan 2024 19:47:36 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dmusic=5Fpath=E8=BD=AC?=
 =?UTF-8?q?=E4=B9=89=E5=AF=BC=E8=87=B4=E7=9A=84HTML=E5=A4=B1=E6=95=88?=
 =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/DataBase/exporter_html.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/DataBase/exporter_html.py b/app/DataBase/exporter_html.py
index a657246..2b29f45 100644
--- a/app/DataBase/exporter_html.py
+++ b/app/DataBase/exporter_html.py
@@ -232,6 +232,7 @@ class HtmlExporter(ExporterBase):
             is_chatroom = 1 if self.contact.is_chatroom else 0
             avatar = self.get_avatar_path(is_send, message)
             display_name = self.get_display_name(is_send, message)
+            music_path = escape_js_and_html(music_path)
             doc.write(
                 f'''{{ type:49, text:'{music_path}',is_send:{is_send},avatar_path:'{avatar}',link_url:'{content.get('link_url')}',
                 timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',sub_type:3,title:'{content.get('title')}',

From 4fa56b888a6e40c3f81c9eab78cae98907c04a9a Mon Sep 17 00:00:00 2001
From: shuaikangzhou <863909694@qq.com>
Date: Thu, 18 Jan 2024 23:06:37 +0800
Subject: [PATCH 3/3] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=AF=BC=E5=87=BAdocx?=
 =?UTF-8?q?=E7=9A=84=E6=96=B9=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/DataBase/exporter_docx.py | 90 +++++++++++++++++++++++++++++++++--
 app/DataBase/exporter_html.py | 15 ++----
 app/DataBase/output.py        |  8 +++-
 app/DataBase/output_pc.py     |  6 +--
 4 files changed, 98 insertions(+), 21 deletions(-)

diff --git a/app/DataBase/exporter_docx.py b/app/DataBase/exporter_docx.py
index 2fe4e51..48ebfdb 100644
--- a/app/DataBase/exporter_docx.py
+++ b/app/DataBase/exporter_docx.py
@@ -25,6 +25,7 @@ encoded_chars = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11\x
 # 创建一个字典，将要删除的字符映射为 None
 char_mapping = {char: None for char in encoded_chars}
 
+
 def filter_control_characters(input_string):
     """
     过滤掉不可打印字符
@@ -39,6 +40,84 @@ def filter_control_characters(input_string):
 
 
 class DocxExporter(ExporterBase):
+    def merge_docx(self, n):
+        self.process_num += 1
+        conRemark = self.contact.remark
+        origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{conRemark}"
+        filename = f"{origin_docx_path}/{conRemark}_{n}.docx"
+        # print(all_file_path)
+        doc = docx.Document(filename)
+        if self.merged_doc_index == [-1, -1]:
+            self.document.append(doc)
+            self.merged_doc_index = [n, n]
+        else:
+            if n == self.merged_doc_index[0] - 1:
+                self.document.insert(0, doc)
+                self.merged_doc_index[0] -= 1
+            elif n == self.merged_doc_index[1] + 1:
+                self.document.append(doc)
+                self.merged_doc_index[1] += 1
+            else:
+                self.docs.append([doc, n])
+                self.docs_set.add(n)
+            new_docx = []
+            new_set = set()
+            # print(self.docs)
+            while new_set!=self.docs_set:
+                self.docs.sort(key=lambda x: x[1])
+                for doc_, index in self.docs:
+                    if index == self.merged_doc_index[0] - 1:
+                        self.document.insert(0, doc_)
+                        self.merged_doc_index[0] -= 1
+                    elif index == self.merged_doc_index[1] + 1:
+                        self.document.append(doc_)
+                        self.merged_doc_index[1] += 1
+                    else:
+                        new_docx.append([doc_, index])
+                        new_set.add(index)
+            self.docs = new_docx
+            self.docs_set = new_set
+        os.remove(filename)
+        if self.process_num == self.child_thread_num:
+            # self.document.append(self.document)
+            file = os.path.join(origin_docx_path, f'{conRemark}.docx')
+            try:
+                self.document.save(file)
+            except PermissionError:
+                file = file[:-5] + f'{time.time()}' + '.docx'
+                self.document.save(file)
+            self.okSignal.emit(1)
+
+    def export(self):
+        self.child_threads = []
+        messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
+        # 计算每个子列表的长度
+        num = 1
+        # num = len(messages) // 500 +1
+        sublist_length = len(messages) // num
+
+        # 使用列表切片将列表分成n个子列表
+        divided_list = [messages[i:i + sublist_length] for i in range(0, len(messages), sublist_length)]
+        self.child_thread_num = len(divided_list)
+        self.process_num = 0
+        doc = docx.Document()
+        doc.styles["Normal"].font.name = "Cambria"
+        doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
+        self.document = Composer(doc)
+        self.merged_doc_index = [-1, -1]
+        self.docs = []
+        self.docs_set = set()
+        # self.document.append(self.document)
+        for i in range(self.child_thread_num):
+            child_thread = DocxExporterChildThread(self.contact, type_=self.DOCX, message_types=self.message_types,
+                                                   time_range=self.time_range, messages=divided_list[i], index=i)
+            self.child_threads.append(child_thread)
+            child_thread.okSignal.connect(self.merge_docx)
+            child_thread.progressSignal.connect(self.progressSignal)
+            child_thread.start()
+
+
+class DocxExporterChildThread(ExporterBase):
     def text(self, doc, message):
         type_ = message[2]
         str_content = message[7]
@@ -313,7 +392,7 @@ class DocxExporter(ExporterBase):
     def export(self):
         print(f"【开始导出 DOCX {self.contact.remark}】")
         origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
-        messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
+        messages = self.messages
         Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png"))
         if self.contact.is_chatroom:
             for message in messages:
@@ -329,6 +408,8 @@ class DocxExporter(ExporterBase):
             self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png"))
         self.rangeSignal.emit(len(messages))
 
+        index = 0
+
         def newdoc():
             nonlocal n, doc
             doc = docx.Document()
@@ -347,8 +428,8 @@ class DocxExporter(ExporterBase):
         for index, message in enumerate(messages):
             if index % 200 == 0 and index:
                 # doc = document.add_paragraph()
-                filename = os.path.join(origin_docx_path, f"{self.contact.remark}{n}.docx")
-                doc.save(filename)
+                # filename = os.path.join(origin_docx_path, f"{self.contact.remark}{n}.docx")
+                # doc.save(filename)
                 newdoc()
 
             type_ = message[2]
@@ -391,6 +472,7 @@ class DocxExporter(ExporterBase):
         for index, dx in enumerate(docs[::-1]):
             print(f"【合并 DOCX {self.contact.remark}】{index + 1}/{len(docs)}")
             doc.insert(0, dx)
+        filename = os.path.join(origin_docx_path, f"{self.contact.remark}_{self.index}.docx")
         try:
             # document.save(filename)
             doc.save(filename)
@@ -399,4 +481,4 @@ class DocxExporter(ExporterBase):
             # document.save(filename)
             doc.save(filename)
         print(f"【完成导出 DOCX {self.contact.remark}】")
-        self.okSignal.emit(1)
+        self.okSignal.emit(self.index)
diff --git a/app/DataBase/exporter_html.py b/app/DataBase/exporter_html.py
index 2b29f45..f6ff22f 100644
--- a/app/DataBase/exporter_html.py
+++ b/app/DataBase/exporter_html.py
@@ -127,10 +127,7 @@ class HtmlExporter(ExporterBase):
             if file_path != "":
                 file_path = './file/' + file_info.get('file_name')
             doc.write(
-                f'''{{ type:49, text: '{file_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp}
-                            ,is_chatroom:{is_chatroom},displayname:'{display_name}',icon_path: '{icon_path}'
-                            ,sub_type:6,file_name: '{file_info.get('file_name')}',file_size: '{file_info.get('file_len')}'
-                            ,app_name: '{file_info.get('app_name')}'}},'''
+                f'''{{ type:49, text: '{file_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',icon_path: '{icon_path}',sub_type:6,file_name: '{file_info.get('file_name')}',file_size: '{file_info.get('file_len')}',app_name: '{file_info.get('app_name')}'}},'''
             )
 
     def refermsg(self, doc, message):
@@ -234,9 +231,7 @@ class HtmlExporter(ExporterBase):
             display_name = self.get_display_name(is_send, message)
             music_path = escape_js_and_html(music_path)
             doc.write(
-                f'''{{ type:49, text:'{music_path}',is_send:{is_send},avatar_path:'{avatar}',link_url:'{content.get('link_url')}',
-                timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',sub_type:3,title:'{content.get('title')}',
-                artist:'{content.get('artist')}', website_name:'{content.get('website_name')}'}},'''
+                f'''{{ type:49, text:'{music_path}',is_send:{is_send},avatar_path:'{avatar}',link_url:'{content.get('link_url')}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',sub_type:3,title:'{content.get('title')}',artist:'{content.get('artist')}', website_name:'{content.get('website_name')}'}},'''
             )
 
     def share_card(self, doc, message):
@@ -266,11 +261,7 @@ class HtmlExporter(ExporterBase):
             else:
                 app_logo = card_data.get('app_logo')
         doc.write(
-            f'''{{ type:49,sub_type:5, text:'',is_send:{is_send},avatar_path:'{avatar}',url:'{card_data.get('url')}',
-                    timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',title:'{card_data.get('title')}',
-                    description:'{card_data.get('description')}',thumbnail:'{thumbnail}',app_logo:'{app_logo}',
-                    app_name:'{card_data.get('app_name')}'
-                    }},\n'''
+            f'''{{ type:49,sub_type:5, text:'',is_send:{is_send},avatar_path:'{avatar}',url:'{card_data.get('url')}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',title:'{card_data.get('title')}',description:'{card_data.get('description')}',thumbnail:'{thumbnail}',app_logo:'{app_logo}',app_name:'{card_data.get('app_name')}'}},\n'''
         )
 
     def export(self):
diff --git a/app/DataBase/output.py b/app/DataBase/output.py
index 6aa91c7..b35abf4 100644
--- a/app/DataBase/output.py
+++ b/app/DataBase/output.py
@@ -90,19 +90,23 @@ class ExporterBase(QThread):
     CONTACT_CSV = 4
     TXT = 5
 
-    def __init__(self, contact, type_=DOCX, message_types={},time_range=None, parent=None):
+    def __init__(self, contact, type_=DOCX, message_types={}, time_range=None, messages=None,index=0, parent=None):
         super().__init__(parent)
         self.message_types = message_types  # 导出的消息类型
         self.contact: Contact = contact  # 联系人
         self.output_type = type_  # 导出文件类型
         self.total_num = 1  # 总的消息数量
         self.num = 0  # 当前处理的消息数量
+        self.index = index #
         self.last_timestamp = 0
         self.time_range = time_range
+        self.messages = messages
         origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
         makedirs(origin_docx_path)
+
     def run(self):
         self.export()
+
     def export(self):
         raise NotImplementedError("export method must be implemented in subclasses")
 
@@ -166,4 +170,4 @@ class ExporterBase(QThread):
         return
 
     def share_card(self, doc, message):
-        return
\ No newline at end of file
+        return
diff --git a/app/DataBase/output_pc.py b/app/DataBase/output_pc.py
index d7dcfed..6905b45 100644
--- a/app/DataBase/output_pc.py
+++ b/app/DataBase/output_pc.py
@@ -3,7 +3,7 @@ import os
 import traceback
 from typing import List
 
-from PyQt5.QtCore import pyqtSignal, QThread
+from PyQt5.QtCore import pyqtSignal, QThread, QObject
 from PyQt5.QtWidgets import QFileDialog
 
 from app.DataBase.exporter_csv import CSVExporter
@@ -20,7 +20,7 @@ from ..util.image import get_image
 os.makedirs('./data/聊天记录', exist_ok=True)
 
 
-class Output(QThread):
+class Output(QObject):
     """
     发送信息线程
     """
@@ -222,7 +222,7 @@ class Output(QThread):
         Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one)
         Child.start()
 
-    def run(self):
+    def start(self):
         if self.output_type == self.DOCX:
             self.to_docx(self.contact, self.message_types)
         elif self.output_type == self.CSV_ALL: