diff --git a/app/DataBase/output_pc.py b/app/DataBase/output_pc.py index 9fa80a9..8084d31 100644 --- a/app/DataBase/output_pc.py +++ b/app/DataBase/output_pc.py @@ -17,6 +17,7 @@ from ..util import path import shutil from ..util.compress_content import parser_reply from ..util.emoji import get_emoji, get_emoji_path +from ..util.image import get_image_path, get_image os.makedirs('./data/聊天记录', exist_ok=True) @@ -74,7 +75,7 @@ class Output(QThread): self.ta_username = contact.wxid if contact else '' self.msg_id = 0 self.output_type = type_ - self.total_num = 0 + self.total_num = 1 self.num = 0 def progress(self, value): @@ -128,23 +129,40 @@ class Output(QThread): self.to_csv_all() elif self.output_type == self.CONTACT_CSV: self.contact_to_csv() - else: + elif self.output_type == self.CSV: self.Child = ChildThread(self.contact, type_=self.output_type, message_types=self.message_types) self.Child.progressSignal.connect(self.progress) self.Child.rangeSignal.connect(self.rangeSignal) + self.Child.okSignal.connect(self.okSignal) + self.Child.start() + elif self.output_type == self.HTML: + self.Child = ChildThread(self.contact, type_=self.output_type, message_types=self.message_types) + self.Child.progressSignal.connect(self.progressSignal) + self.Child.rangeSignal.connect(self.rangeSignal) self.Child.okSignal.connect(self.count_finish_num) self.Child.start() - # 语音消息单独的线程 - self.output_media = OutputMedia(self.contact) - self.output_media.okSingal.connect(self.count_finish_num) - self.output_media.progressSignal.connect(self.progress) - self.output_media.start() - # emoji消息单独的线程 - self.output_emoji = OutputEmoji(self.contact) - self.output_emoji.okSingal.connect(self.count_finish_num) - self.output_emoji.progressSignal.connect(self.progress) - self.output_emoji.start() - self.total_num = 3 + if self.message_types.get(34): + # 语音消息单独的线程 + self.total_num += 1 + self.output_media = OutputMedia(self.contact) + self.output_media.okSingal.connect(self.count_finish_num) + self.output_media.progressSignal.connect(self.progressSignal) + self.output_media.start() + + if self.message_types.get(47): + # emoji消息单独的线程 + self.total_num += 1 + self.output_emoji = OutputEmoji(self.contact) + self.output_emoji.okSingal.connect(self.count_finish_num) + self.output_emoji.progressSignal.connect(self.progressSignal) + self.output_emoji.start() + if self.message_types.get(3): + # emoji消息单独的线程 + self.total_num += 1 + self.output_image = OutputImage(self.contact) + self.output_image.okSingal.connect(self.count_finish_num) + self.output_image.progressSignal.connect(self.progressSignal) + self.output_image.start() def count_finish_num(self, num): self.num += 1 @@ -249,22 +267,14 @@ class ChildThread(QThread): if self.output_type == Output.HTML: str_content = escape_js_and_html(str_content) image_path = hard_link_db.get_image(str_content, BytesExtra, thumb=False) - image_thumb_path = hard_link_db.get_image(str_content, BytesExtra, thumb=True) + image_path = hard_link_db.get_image(str_content, BytesExtra, thumb=False) if not os.path.exists(os.path.join(MePC().wx_dir, image_path)): - image_path = None - if not os.path.exists(os.path.join(MePC().wx_dir, image_thumb_path)): - image_thumb_path = None - if image_path is None and image_thumb_path is not None: + image_thumb_path = hard_link_db.get_image(str_content, BytesExtra, thumb=True) + if not os.path.exists(os.path.join(MePC().wx_dir, image_thumb_path)): + return image_path = image_thumb_path - if image_path is None and image_thumb_path is None: - return - image_path = path.get_relative_path(image_path, base_path=f'/data/聊天记录/{self.contact.remark}/image') + image_path = get_image_path(image_path, base_path=f'/data/聊天记录/{self.contact.remark}/image') image_path = image_path.replace('/', '\\') - try: - os.utime(origin_docx_path + image_path[1:], (timestamp, timestamp)) - except: - print("网络图片", image_path) - pass image_path = image_path.replace('\\', '/') doc.write( f'''{{ type:{type_}, text: '{image_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{displayname}'}},''' @@ -535,8 +545,11 @@ class ChildThread(QThread): type_ = message[2] sub_type = message[3] timestamp = message[5] - if type_ != 34 and type_ != 47: + if (type_ == 3 and self.message_types.get(3)) or (type_ == 34 and self.message_types.get(34)) or (type_ == 47 and self.message_types.get(47)): + pass + else: self.progressSignal.emit(1) + if self.is_5_min(timestamp): str_time = message[8] f.write( @@ -629,10 +642,11 @@ class OutputMedia(QThread): displayname = MePC().name if is_send else self.contact.remark displayname = escape_js_and_html(displayname) modify_audio_metadata(audio_path, displayname) + # os.utime(audio_path, (timestamp, timestamp)) except: logger.error(traceback.format_exc()) - # os.utime(audio_path, (timestamp, timestamp)) - self.progressSignal.emit(1) + finally: + self.progressSignal.emit(1) self.okSingal.emit(34) @@ -653,5 +667,97 @@ class OutputEmoji(QThread): emoji_path = get_emoji(str_content, thumb=True, output_path=origin_docx_path + '/emoji') except: logger.error(traceback.format_exc()) - self.progressSignal.emit(1) - self.okSingal.emit(34) + finally: + self.progressSignal.emit(1) + self.okSingal.emit(47) + + +class OutputImage(QThread): + okSingal = pyqtSignal(int) + progressSignal = pyqtSignal(int) + + def __init__(self, contact): + super().__init__() + self.contact = contact + self.child_thread_num = 2 + self.child_threads = [0]*(self.child_thread_num+1) + self.num = 0 + + def count1(self, num): + self.num += 1 + print('图片导出完成一个') + if self.num == self.child_thread_num: + self.okSingal.emit(47) + print('图片导出完成') + + def run(self): + origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" + messages = msg_db.get_messages_by_type(self.contact.wxid, 3) + for message in messages: + str_content = message[7] + BytesExtra = message[10] + timestamp = message[5] + try: + image_path = hard_link_db.get_image(str_content, BytesExtra, thumb=False) + if not os.path.exists(os.path.join(MePC().wx_dir, image_path)): + image_thumb_path = hard_link_db.get_image(str_content, BytesExtra, thumb=True) + if not os.path.exists(os.path.join(MePC().wx_dir, image_thumb_path)): + continue + image_path = image_thumb_path + image_path = get_image(image_path, base_path=f'/data/聊天记录/{self.contact.remark}/image') + try: + os.utime(origin_docx_path + image_path[1:], (timestamp, timestamp)) + except: + pass + except: + logger.error(traceback.format_exc()) + finally: + self.progressSignal.emit(1) + self.okSingal.emit(47) + # sublist_length = len(messages) // self.child_thread_num + # index = 0 + # for i in range(0, len(messages), sublist_length): + # child_messages = messages[i:i + sublist_length] + # self.child_threads[index] = OutputImageChild(self.contact, child_messages) + # self.child_threads[index].okSingal.connect(self.count1) + # self.child_threads[index].progressSignal.connect(self.progressSignal) + # self.child_threads[index].start() + # print('开启一个新线程') + # index += 1 + + + + +class OutputImageChild(QThread): + okSingal = pyqtSignal(int) + progressSignal = pyqtSignal(int) + + def __init__(self, contact, messages): + super().__init__() + self.contact = contact + self.messages = messages + + def run(self): + origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" + for message in self.messages: + str_content = message[7] + BytesExtra = message[10] + timestamp = message[5] + try: + image_path = hard_link_db.get_image(str_content, BytesExtra, thumb=False) + if not os.path.exists(os.path.join(MePC().wx_dir, image_path)): + image_thumb_path = hard_link_db.get_image(str_content, BytesExtra, thumb=True) + if not os.path.exists(os.path.join(MePC().wx_dir, image_thumb_path)): + continue + image_path = image_thumb_path + image_path = get_image(image_path, base_path=f'/data/聊天记录/{self.contact.remark}/image') + try: + os.utime(origin_docx_path + image_path[1:], (timestamp, timestamp)) + except: + pass + except: + logger.error(traceback.format_exc()) + finally: + self.progressSignal.emit(1) + self.okSingal.emit(47) + print('图片子线程完成') diff --git a/app/ui/contact/export_dialog.py b/app/ui/contact/export_dialog.py index 1776103..07f51a2 100644 --- a/app/ui/contact/export_dialog.py +++ b/app/ui/contact/export_dialog.py @@ -72,7 +72,7 @@ class ExportDialog(QDialog): self.setLayout(layout) self.timer = QTimer(self) self.time = 0 - self.total_msg_num = 100 # 总的消息个数 + self.total_msg_num = 99999 # 总的消息个数 self.num = 0 # 当前完成的消息个数 self.timer.timeout.connect(self.update_elapsed_time) diff --git a/app/util/dat2pic.py b/app/util/image.py similarity index 51% rename from app/util/dat2pic.py rename to app/util/image.py index 7a36d9c..8bc3223 100644 --- a/app/util/dat2pic.py +++ b/app/util/image.py @@ -1,5 +1,7 @@ import os +from app.person import MePC + # 图片字节头信息, # [0][1]为jpg头信息, # [2][3]为png头信息, @@ -9,19 +11,13 @@ pic_head = [0xff, 0xd8, 0x89, 0x50, 0x47, 0x49] decode_code = 0 -def get_code(file_path): +def get_code(dat_read): """ 自动判断文件类型,并获取dat文件解密码 :param file_path: dat文件路径 :return: 如果文件为jpg/png/gif格式,则返回解密码,否则返回-1 """ - if os.path.isdir(file_path): - return -1, -1 - # if file_path[-4:] != ".dat": - # return -1, -1 - dat_file = open(file_path, "rb") - dat_read = dat_file.read(2) - # print(dat_read) + head_index = 0 while head_index < len(pic_head): # 使用第一个头信息字节来计算加密码 @@ -30,10 +26,8 @@ def get_code(file_path): idf_code = dat_read[1] ^ code head_index = head_index + 1 if idf_code == pic_head[head_index]: - dat_file.close() return head_index, code head_index = head_index + 1 - dat_file.close() print("not jpg, png, gif") return -1, -1 @@ -46,10 +40,13 @@ def decode_dat(file_path, out_path): """ if not os.path.exists(file_path): return None - file_type, decode_code = get_code(file_path) + with open(file_path, 'rb') as file_in: + data = file_in.read() + file_type, decode_code = get_code(data[:2]) if decode_code == -1: return + filename = os.path.basename(file_path) if file_type == 1: pic_name = os.path.basename(file_path)[:-4] + ".jpg" @@ -62,8 +59,7 @@ def decode_dat(file_path, out_path): file_outpath = os.path.join(out_path, pic_name) if os.path.exists(file_outpath): return file_outpath - with open(file_path, 'rb') as file_in: - data = file_in.read() + # 对数据进行异或加密/解密 with open(file_outpath, 'wb') as file_out: file_out.write(bytes([byte ^ decode_code for byte in data])) @@ -71,6 +67,34 @@ def decode_dat(file_path, out_path): return file_outpath +def decode_dat_path(file_path, out_path): + """ + 解密文件,并生成图片 + :param file_path: dat文件路径 + :return: 无 + """ + if not os.path.exists(file_path): + return None + with open(file_path, 'rb') as file_in: + data = file_in.read(2) + file_type, decode_code = get_code(data) + + if decode_code == -1: + return + + filename = os.path.basename(file_path) + if file_type == 1: + pic_name = os.path.basename(file_path)[:-4] + ".jpg" + elif file_type == 3: + pic_name = filename[:-4] + ".png" + elif file_type == 5: + pic_name = filename[:-4] + ".gif" + else: + pic_name = filename[:-4] + ".jpg" + file_outpath = os.path.join(out_path, pic_name) + return file_outpath + + def find_datfile(dir_path, out_path): """ 获取dat文件目录下所有的文件 @@ -83,6 +107,28 @@ def find_datfile(dir_path, out_path): decode_dat(file_path, out_path) +def get_image(path, base_path) -> str: + if path: + base_path = os.getcwd() + base_path + output_path = decode_dat(os.path.join(MePC().wx_dir, path), base_path) + relative_path = './image/' + os.path.basename( + output_path) if output_path else 'https://www.bing.com/images/search?view=detailV2&ccid=Zww6woP3&id=CCC91337C740656E800E51247E928ACD3052FECF&thid=OIP.Zww6woP3Em49TdSG_lnggAHaEK&mediaurl=https%3a%2f%2fmeekcitizen.files.wordpress.com%2f2018%2f09%2f404.jpg%3fw%3d656&exph=360&expw=640&q=404&simid=608040792714530493&FORM=IRPRST&ck=151E7337A86F1B9C5C5DB08B15B90809&selectedIndex=21&itb=0' + return relative_path + else: + return ':/icons/icons/404.png' + + +def get_image_path(path, base_path) -> str: + if path: + base_path = os.getcwd() + base_path + output_path = decode_dat_path(os.path.join(MePC().wx_dir, path), base_path) + relative_path = './image/' + os.path.basename( + output_path) if output_path else 'https://www.bing.com/images/search?view=detailV2&ccid=Zww6woP3&id=CCC91337C740656E800E51247E928ACD3052FECF&thid=OIP.Zww6woP3Em49TdSG_lnggAHaEK&mediaurl=https%3a%2f%2fmeekcitizen.files.wordpress.com%2f2018%2f09%2f404.jpg%3fw%3d656&exph=360&expw=640&q=404&simid=608040792714530493&FORM=IRPRST&ck=151E7337A86F1B9C5C5DB08B15B90809&selectedIndex=21&itb=0' + return relative_path + else: + return ':/icons/icons/404.png' + + if __name__ == "__main__": path = "E:\86390\Documents\WeChat Files\wxid_27hqbq7vx5hf22\FileStorage\CustomEmotion\\71\\" outpath = "D:\\test" diff --git a/app/util/path.py b/app/util/path.py index ae5718d..1eee4a1 100644 --- a/app/util/path.py +++ b/app/util/path.py @@ -2,7 +2,7 @@ import os import winreg from app.person import MePC -from app.util import dat2pic +from app.util import image os.makedirs('./data/image', exist_ok=True) @@ -11,7 +11,7 @@ def get_abs_path(path, base_path="/data/image"): # return os.path.join(os.getcwd(), 'app/data/icons/404.png') if path: base_path = os.getcwd() + base_path - output_path = dat2pic.decode_dat(os.path.join(MePC().wx_dir, path), base_path) + output_path = image.decode_dat(os.path.join(MePC().wx_dir, path), base_path) return output_path if output_path else ':/icons/icons/404.png' else: return ':/icons/icons/404.png' @@ -20,7 +20,7 @@ def get_abs_path(path, base_path="/data/image"): def get_relative_path(path, base_path, type_='image'): if path: base_path = os.getcwd() + base_path - output_path = dat2pic.decode_dat(os.path.join(MePC().wx_dir, path), base_path) + output_path = image.decode_dat(os.path.join(MePC().wx_dir, path), base_path) relative_path = './image/' + os.path.basename( output_path) if output_path else 'https://www.bing.com/images/search?view=detailV2&ccid=Zww6woP3&id=CCC91337C740656E800E51247E928ACD3052FECF&thid=OIP.Zww6woP3Em49TdSG_lnggAHaEK&mediaurl=https%3a%2f%2fmeekcitizen.files.wordpress.com%2f2018%2f09%2f404.jpg%3fw%3d656&exph=360&expw=640&q=404&simid=608040792714530493&FORM=IRPRST&ck=151E7337A86F1B9C5C5DB08B15B90809&selectedIndex=21&itb=0' return relative_path