Initial commit
This commit is contained in:
189
generate_table.py
Normal file
189
generate_table.py
Normal file
@@ -0,0 +1,189 @@
|
||||
|
||||
#########################################################
|
||||
## @file : generate_table.py
|
||||
## @desc : 从json数据,生成html表格,转化为图片
|
||||
## @create : 2025/6/24
|
||||
## @author : Chengan,doubao AI
|
||||
## @email : douboer@gmail.com
|
||||
#########################################################
|
||||
|
||||
import pandas as pd
|
||||
from pretty_html_table import build_table
|
||||
from PIL import Image
|
||||
import imgkit
|
||||
import os
|
||||
from logger_utils import CommonLogger
|
||||
from functools import partial
|
||||
|
||||
|
||||
class GenerateTable:
|
||||
def __init__(self, top_margin=100, bottom_margin=100, side_margin=100, table_spacing=100, offset=0):
|
||||
# 初始化日志记录器
|
||||
self.logger = CommonLogger(log_file='temp/example_generate_table.log').get_logger()
|
||||
self.top_margin = top_margin # 表格距离图片顶部的边距,单位为像素,可通过参数传入自定义值
|
||||
self.bottom_margin = bottom_margin # 表格距离图片底部的边距,单位为像素,可通过参数传入自定义值
|
||||
self.side_margin = side_margin # 表格距离图片左右两侧的边距,单位为像素,可通过参数传入自定义值
|
||||
self.table_spacing = table_spacing # 相邻表格之间的间距,单位为像素,可通过参数传入自定义值
|
||||
self.offset = offset # 一个偏移量,用于在计算表格布局时进行微调,单位为像素,可通过参数传入自定义值
|
||||
self.img_width = 1440 # 生成图片的宽度,固定为 1440 像素
|
||||
self.img_height = 1600 # 生成图片的高度,固定为 1600 像素
|
||||
self.options = {
|
||||
"enable-local-file-access": "", # 确保启用本地文件访问
|
||||
"encoding": "UTF-8",
|
||||
"width": self.img_width,
|
||||
"height": self.img_height,
|
||||
'crop-h': self.img_height, # 只保留前 img_height 的图像,后面裁掉
|
||||
|
||||
}
|
||||
self.current_dir = os.getcwd() # 获取当前执行路径
|
||||
self.background_original_path = os.path.join(self.current_dir, 'backgrounds', 'IMG_5790.JPG')
|
||||
self.background_resized_path = os.path.join(self.current_dir, 'backgrounds', 'resized_IMG_5790.JPG')
|
||||
self.temp_dir = os.path.join(self.current_dir, 'temp')
|
||||
self.cell_height = 100 # 单元格高度,便于精确估算表格高度
|
||||
|
||||
# 调整底图大小
|
||||
def resize_background_image(self):
|
||||
try:
|
||||
image = Image.open(self.background_original_path)
|
||||
resized_image = image.resize((self.img_width, self.img_height))
|
||||
resized_image.save(self.background_resized_path)
|
||||
self.logger.info("底图调整大小并保存成功。")
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
self.logger.error("底图文件未找到,请检查文件路径。")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(f"调整底图大小时出现未知错误: {e}")
|
||||
return False
|
||||
|
||||
# 提取表格内容,处理被<p>标签包裹的情况
|
||||
def extract_table_content(self, html_str):
|
||||
try:
|
||||
html_str = html_str.strip()
|
||||
|
||||
# 处理被<p>标签包裹的情况
|
||||
if html_str.startswith('<p>') and html_str.endswith('</p>'):
|
||||
html_str = html_str[3:-4].strip()
|
||||
|
||||
# 确保字符串包含<table class="dataframe">和</table>
|
||||
if '<table class="dataframe">' in html_str and '</table>' in html_str:
|
||||
start_idx = html_str.index('<table class="dataframe">')
|
||||
end_idx = html_str.index('</table>') + len('</table>')
|
||||
return html_str[start_idx:end_idx]
|
||||
else:
|
||||
return None
|
||||
except Exception as e:
|
||||
self.logger.error(f"提取表格内容时出现错误: {e}")
|
||||
return None
|
||||
|
||||
# 生成 HTML 表格并转换为图片
|
||||
def generate_table_images(self):
|
||||
try:
|
||||
if not self.resize_background_image():
|
||||
return
|
||||
|
||||
df = pd.DataFrame(data={
|
||||
'ID': [1, 2, 3, 4],
|
||||
'First Name': ['Flore', 'Grom', 'Truip', 'Ftro'],
|
||||
'Last Name': ['Ju', 'Re', 'Ve', 'Cy'],
|
||||
'Age': [23, 45, 67, 12],
|
||||
'Place of Birth': ['France', 'USA', 'China', 'India'],
|
||||
'Date of Birth': ['1996-10-04', '1974-10-10', '1952-04-07', '2007-10-06']
|
||||
})
|
||||
|
||||
df2 = pd.DataFrame(data={
|
||||
'维度': ['项目A/Project A', '项目B/Project B', '项目C/Project C', '评估标准/Evaluation'],
|
||||
'进度 Progress': ['按时完成90%的关键里程碑,延期完成10%', '提前两周完成所有阶段', '频繁延期,关键任务延误超过一个月', '按时完 成率≥90%为优秀,80%-89%为良好,<80%为需改进'],
|
||||
'质量 Quality': ['客户满意度调查得分92分', '产品缺陷率低至0.5%', '多次因质量问题返工', '客户满意度≥90%,产品缺陷率≤1%为高质>量'],
|
||||
'成本 Cost': ['超支5%,在可接受范围内', '预算内完成,成本控制出色,节约3%', '成本超支20%', '成本控制在预算+5%内为优秀,+5%-10%>为良好,>10%为需改进'],
|
||||
'风险 Risk': ['无重大损失', '风险识别全面', '风险管理不足', '风险识别率≥90%,有效缓解率≥80%为优秀'],
|
||||
'沟通 Communication': ['定期召开项目会议,沟通记录详尽,反馈及时', '建立了高效的沟通机制使用管理工具促进协作', '沟通不畅,信息 延误导致决策失误', '沟通满意度≥90%,信息传递准确率≥95%为高效沟通']
|
||||
})
|
||||
|
||||
build_table_default = partial(build_table,
|
||||
height=self.cell_height,
|
||||
text_valign='middle',
|
||||
padding="5px 10px 5px 20px",
|
||||
font_family='雅痞-简',
|
||||
font_size='25px'
|
||||
)
|
||||
tables = [
|
||||
build_table_default(df, 'blue_dark'),
|
||||
build_table_default(df2, 'red_dark'),
|
||||
build_table_default(df, 'grey_light'),
|
||||
build_table_default(df2, 'grey_dark'),
|
||||
build_table_default(df, 'orange_light'),
|
||||
build_table_default(df2, 'orange_dark'),
|
||||
build_table_default(df, 'yellow_light'),
|
||||
build_table_default(df2, 'yellow_dark'),
|
||||
build_table_default(df, 'green_light'),
|
||||
build_table_default(df2, 'green_dark'),
|
||||
build_table_default(df, 'red_light'),
|
||||
build_table_default(df2, 'red_dark'),
|
||||
build_table_default(df, 'red_dark'),
|
||||
]
|
||||
|
||||
# 确保临时 HTML 文件夹存在
|
||||
if not os.path.exists(self.temp_dir):
|
||||
os.makedirs(self.temp_dir)
|
||||
|
||||
page_num = 1
|
||||
relative_background_path = os.path.relpath(self.background_resized_path, self.temp_dir)
|
||||
table_width = self.img_width - 2 * self.side_margin
|
||||
current_index = 0 # 记录当前处理到的表格索引
|
||||
|
||||
while current_index < len(tables):
|
||||
current_top = self.top_margin
|
||||
html_content = f"""<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
</head>
|
||||
<body style="background-image: url('{relative_background_path}'); background-size: {self.img_width}px {self.img_height}px;">"""
|
||||
|
||||
while current_index < len(tables):
|
||||
table = tables[current_index]
|
||||
#self.logger.info("原始表格字符串内容:\n%s", table)
|
||||
|
||||
# 提取表格内容
|
||||
table_content = self.extract_table_content(table)
|
||||
if not table_content:
|
||||
self.logger.warning("无法从表格字符串中提取有效表格内容,跳过该表格。")
|
||||
current_index += 1
|
||||
continue
|
||||
|
||||
#self.logger.info("提取的表格内容:\n%s", table_content)
|
||||
|
||||
# 精确估算表格高度
|
||||
table_height_est = len(table_content.split('<tr>')) * self.cell_height
|
||||
if current_top + table_height_est + self.bottom_margin > self.img_height - self.offset:
|
||||
break
|
||||
|
||||
style = f"margin-top: {self.table_spacing}px; margin-left: {self.side_margin}px; margin-right: {self.side_margin}px; margin-bottom: {self.table_spacing}px; width: {table_width}px;"
|
||||
table_with_style = f'<table style="{style}">{table_content.split("<table class=\"dataframe\">")[1].split("</table>")[0]}</table>'
|
||||
html_content += table_with_style
|
||||
|
||||
#self.logger.info("当前表格 page_num %s current_index %s current_top %s table_height_est:%s \n%s",
|
||||
# page_num,current_index,current_top,table_height_est,html_content )
|
||||
|
||||
current_top += table_height_est + self.table_spacing
|
||||
current_index += 1
|
||||
|
||||
html_content += "</body></html>"
|
||||
|
||||
html_path = os.path.join(self.temp_dir, f'example_page_{page_num}.html')
|
||||
with open(html_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html_content)
|
||||
img_path = os.path.join(self.temp_dir, f'example_page_{page_num}.png')
|
||||
imgkit.from_file(html_path, img_path, options=self.options)
|
||||
|
||||
self.logger.info("第 %d 页 HTML 文件和图片生成成功。", page_num)
|
||||
page_num += 1
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"生成表格图片时出现未知错误: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
table_generator = GenerateTable()
|
||||
table_generator.generate_table_images()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user