Files
xhsautopublisher/generate_table.py
2025-09-05 17:20:14 +08:00

190 lines
9.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#########################################################
## @file : generate_table.py
## @desc : 从json数据生成html表格转化为图片
## @create : 2025/6/24
## @author : Chengandoubao AI
## @email : douboer@gmail.com
#########################################################
import pandas as pd
from pretty_html_table import build_table
from PIL import Image
import imgkit
import os
from logger_utils import CommonLogger
from functools import partial
class GenerateTable:
def __init__(self, top_margin=100, bottom_margin=100, side_margin=100, table_spacing=100, offset=0):
# 初始化日志记录器
self.logger = CommonLogger(log_file='temp/example_generate_table.log').get_logger()
self.top_margin = top_margin # 表格距离图片顶部的边距,单位为像素,可通过参数传入自定义值
self.bottom_margin = bottom_margin # 表格距离图片底部的边距,单位为像素,可通过参数传入自定义值
self.side_margin = side_margin # 表格距离图片左右两侧的边距,单位为像素,可通过参数传入自定义值
self.table_spacing = table_spacing # 相邻表格之间的间距,单位为像素,可通过参数传入自定义值
self.offset = offset # 一个偏移量,用于在计算表格布局时进行微调,单位为像素,可通过参数传入自定义值
self.img_width = 1440 # 生成图片的宽度,固定为 1440 像素
self.img_height = 1600 # 生成图片的高度,固定为 1600 像素
self.options = {
"enable-local-file-access": "", # 确保启用本地文件访问
"encoding": "UTF-8",
"width": self.img_width,
"height": self.img_height,
'crop-h': self.img_height, # 只保留前 img_height 的图像,后面裁掉
}
self.current_dir = os.getcwd() # 获取当前执行路径
self.background_original_path = os.path.join(self.current_dir, 'backgrounds', 'IMG_5790.JPG')
self.background_resized_path = os.path.join(self.current_dir, 'backgrounds', 'resized_IMG_5790.JPG')
self.temp_dir = os.path.join(self.current_dir, 'temp')
self.cell_height = 100 # 单元格高度,便于精确估算表格高度
# 调整底图大小
def resize_background_image(self):
try:
image = Image.open(self.background_original_path)
resized_image = image.resize((self.img_width, self.img_height))
resized_image.save(self.background_resized_path)
self.logger.info("底图调整大小并保存成功。")
return True
except FileNotFoundError:
self.logger.error("底图文件未找到,请检查文件路径。")
return False
except Exception as e:
self.logger.error(f"调整底图大小时出现未知错误: {e}")
return False
# 提取表格内容,处理被<p>标签包裹的情况
def extract_table_content(self, html_str):
try:
html_str = html_str.strip()
# 处理被<p>标签包裹的情况
if html_str.startswith('<p>') and html_str.endswith('</p>'):
html_str = html_str[3:-4].strip()
# 确保字符串包含<table class="dataframe">和</table>
if '<table class="dataframe">' in html_str and '</table>' in html_str:
start_idx = html_str.index('<table class="dataframe">')
end_idx = html_str.index('</table>') + len('</table>')
return html_str[start_idx:end_idx]
else:
return None
except Exception as e:
self.logger.error(f"提取表格内容时出现错误: {e}")
return None
# 生成 HTML 表格并转换为图片
def generate_table_images(self):
try:
if not self.resize_background_image():
return
df = pd.DataFrame(data={
'ID': [1, 2, 3, 4],
'First Name': ['Flore', 'Grom', 'Truip', 'Ftro'],
'Last Name': ['Ju', 'Re', 'Ve', 'Cy'],
'Age': [23, 45, 67, 12],
'Place of Birth': ['France', 'USA', 'China', 'India'],
'Date of Birth': ['1996-10-04', '1974-10-10', '1952-04-07', '2007-10-06']
})
df2 = pd.DataFrame(data={
'维度': ['项目A/Project A', '项目B/Project B', '项目C/Project C', '评估标准/Evaluation'],
'进度 Progress': ['按时完成90%的关键里程碑,延期完成10%', '提前两周完成所有阶段', '频繁延期,关键任务延误超过一个月', '按时完 成率≥90%为优秀,80%-89%为良好,<80%为需改进'],
'质量 Quality': ['客户满意度调查得分92分', '产品缺陷率低至0.5%', '多次因质量问题返工', '客户满意度≥90%,产品缺陷率≤1%为高质>量'],
'成本 Cost': ['超支5%,在可接受范围内', '预算内完成,成本控制出色,节约3%', '成本超支20%', '成本控制在预算+5%内为优秀,+5%-10%>为良好,>10%为需改进'],
'风险 Risk': ['无重大损失', '风险识别全面', '风险管理不足', '风险识别率≥90%,有效缓解率≥80%为优秀'],
'沟通 Communication': ['定期召开项目会议,沟通记录详尽,反馈及时', '建立了高效的沟通机制使用管理工具促进协作', '沟通不畅,信息 延误导致决策失误', '沟通满意度≥90%,信息传递准确率≥95%为高效沟通']
})
build_table_default = partial(build_table,
height=self.cell_height,
text_valign='middle',
padding="5px 10px 5px 20px",
font_family='雅痞-简',
font_size='25px'
)
tables = [
build_table_default(df, 'blue_dark'),
build_table_default(df2, 'red_dark'),
build_table_default(df, 'grey_light'),
build_table_default(df2, 'grey_dark'),
build_table_default(df, 'orange_light'),
build_table_default(df2, 'orange_dark'),
build_table_default(df, 'yellow_light'),
build_table_default(df2, 'yellow_dark'),
build_table_default(df, 'green_light'),
build_table_default(df2, 'green_dark'),
build_table_default(df, 'red_light'),
build_table_default(df2, 'red_dark'),
build_table_default(df, 'red_dark'),
]
# 确保临时 HTML 文件夹存在
if not os.path.exists(self.temp_dir):
os.makedirs(self.temp_dir)
page_num = 1
relative_background_path = os.path.relpath(self.background_resized_path, self.temp_dir)
table_width = self.img_width - 2 * self.side_margin
current_index = 0 # 记录当前处理到的表格索引
while current_index < len(tables):
current_top = self.top_margin
html_content = f"""<html>
<head>
<meta charset="UTF-8">
</head>
<body style="background-image: url('{relative_background_path}'); background-size: {self.img_width}px {self.img_height}px;">"""
while current_index < len(tables):
table = tables[current_index]
#self.logger.info("原始表格字符串内容:\n%s", table)
# 提取表格内容
table_content = self.extract_table_content(table)
if not table_content:
self.logger.warning("无法从表格字符串中提取有效表格内容,跳过该表格。")
current_index += 1
continue
#self.logger.info("提取的表格内容:\n%s", table_content)
# 精确估算表格高度
table_height_est = len(table_content.split('<tr>')) * self.cell_height
if current_top + table_height_est + self.bottom_margin > self.img_height - self.offset:
break
style = f"margin-top: {self.table_spacing}px; margin-left: {self.side_margin}px; margin-right: {self.side_margin}px; margin-bottom: {self.table_spacing}px; width: {table_width}px;"
table_with_style = f'<table style="{style}">{table_content.split("<table class=\"dataframe\">")[1].split("</table>")[0]}</table>'
html_content += table_with_style
#self.logger.info("当前表格 page_num %s current_index %s current_top %s table_height_est%s \n%s",
# page_num,current_index,current_top,table_height_est,html_content )
current_top += table_height_est + self.table_spacing
current_index += 1
html_content += "</body></html>"
html_path = os.path.join(self.temp_dir, f'example_page_{page_num}.html')
with open(html_path, 'w', encoding='utf-8') as f:
f.write(html_content)
img_path = os.path.join(self.temp_dir, f'example_page_{page_num}.png')
imgkit.from_file(html_path, img_path, options=self.options)
self.logger.info("%d 页 HTML 文件和图片生成成功。", page_num)
page_num += 1
except Exception as e:
self.logger.error(f"生成表格图片时出现未知错误: {e}")
if __name__ == "__main__":
table_generator = GenerateTable()
table_generator.generate_table_images()