diff --git a/README.md b/README.md index dcb60a3..356c684 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# 豆瓣滑块验证码检测器 +# 豆瓣滑块验证码检测器(v1.0.0) 基于 sharp 图像处理库实现的滑块验证码缺口精确识别工具。 @@ -6,9 +6,9 @@ - ✅ 自动检测图片中的滑块缺口位置 - ✅ 支持多滑块检测(一张图片中检测多个滑块) -- ✅ 使用多阈值策略提高检测准确率 +- ✅ 多策略候选区域搜索与自适应评分体系 - ✅ 自动在图片上绘制蓝色边框标注检测结果 -- ✅ 支持人工标注对比验证 +- ✅ 支持基于 ground-truth 的准确率评估 ## 安装依赖 @@ -18,7 +18,7 @@ npm install ## 使用方法 -### 运行检测 +### 运行检测(默认数据集) ```bash npm run detect @@ -29,6 +29,14 @@ npm run detect 2. 处理 `images/douban` 目录中的所有图片 3. 将检测结果(带蓝色边框)输出到 `images/output` 目录 +### 自定义目录 + +```bash +npm run detect -- --pic-dir=images/custom +``` + +> `--pic-dir` 为 `README` 所在目录的相对路径,检测结果仍然输出到 `images/output`。 + ### 目录结构 ``` @@ -44,22 +52,11 @@ images/ ### 核心思路 -1. **暗色区域检测**:滑块缺口通常为暗色(阴影)区域 -2. **多阈值策略**:使用 75、90、105 三个亮度阈值进行检测,提高鲁棒性 -3. **形态学处理**:使用闭运算(先膨胀后腐蚀)去除噪点 -4. **连通区域分析**:使用洪水填充算法查找所有暗色连通区域 -5. **特征筛选**: - - 尺寸范围:50-95 像素(宽度和高度) - - 宽高比:0.8-1.25(接近正方形) - - 位置:图片中部区域(15%-75%高度) - - 密度:像素填充率 > 55% - -### 检测结果 - -当前性能(在 9 张测试图片上): -- **验证准确率**:55.6%(5/9) -- **检测完成率**:66.7%(6/9) -- 支持多滑块检测(滑块-1.png 检测到 2 个滑块) +1. **多阈值暗/亮检测**:从增强对比度后的图像中提取亮/暗连通域 +2. **Sobel 边缘候选**:基于边缘图的形态学操作补充候选框 +3. **颜色量化与 LAB 色差扫描**:强化低对比度场景下的候选覆盖 +4. **候选融合**:利用 IoU 去重并按颜色一致性、内部边缘密度、梯度平滑度打分 +5. **自学习滑块扩展**:在首个高置信度框的基础上,使用模板匹配寻找第二个滑块 ## 开发 @@ -71,15 +68,13 @@ npm run build ### 调试 -查看未检测到的图片特征: -```bash -npx ts-node --transpile-only src/debug-threshold.ts -``` +核心逻辑已经拆分为模块化的检测管线: -分析红框标注信息: -```bash -npx ts-node --transpile-only src/analyze.ts -``` +- `src/detection/candidate-search.ts`:多策略候选区域查找与评分 +- `src/utils/image.ts`:图像形态学、边缘工具函数 +- `src/detector.ts`:检测器主体、框体精炼与多滑块协同策略 +- `src/cli.ts`:命令行入口,负责加载 ground-truth 与批量处理 +- `src/validator.ts`:IoU/容差匹配与统计分析 ## 技术栈 @@ -87,6 +82,10 @@ npx ts-node --transpile-only src/analyze.ts - Sharp (图像处理) - Node.js +## 版本历史 + +- v1.0.0:引入模块化检测管线、CLI 工具、ground-truth 验证与多策略候选融合。 + ## 优化方向 1. 提高对低对比度图片的检测能力 diff --git a/ground-truth.json b/ground-truth.json index 8f49fcc..79a9d3c 100644 --- a/ground-truth.json +++ b/ground-truth.json @@ -14,115 +14,381 @@ } ], "滑块-2.png": [ - { - "x": 454, - "y": 244, - "width": 90, - "height": 92 - }, { "x": 125, "y": 245, "width": 89, "height": 91 + }, + { + "x": 454, + "y": 244, + "width": 90, + "height": 92 } ], "滑块-3.png": [ - { - "x": 576, - "y": 237, - "width": 87, - "height": 88 - }, { "x": 122, "y": 238, "width": 86, "height": 87 + }, + { + "x": 576, + "y": 237, + "width": 87, + "height": 88 } ], "滑块-4.png": [ - { - "x": 488, - "y": 329, - "width": 91, - "height": 91 - }, { "x": 120, "y": 330, "width": 90, "height": 90 + }, + { + "x": 488, + "y": 329, + "width": 91, + "height": 91 } ], "滑块-5.png": [ - { - "x": 404, - "y": 443, - "width": 91, - "height": 89 - }, { "x": 119, "y": 444, "width": 90, "height": 88 + }, + { + "x": 404, + "y": 443, + "width": 91, + "height": 89 } ], "滑块-6.png": [ - { - "x": 574, - "y": 318, - "width": 92, - "height": 92 - }, { "x": 116, "y": 319, "width": 91, "height": 91 + }, + { + "x": 574, + "y": 318, + "width": 92, + "height": 92 } ], "滑块-7.png": [ - { - "x": 349, - "y": 177, - "width": 101, - "height": 166 - }, { "x": 119, "y": 255, "width": 88, "height": 88 + }, + { + "x": 349, + "y": 177, + "width": 101, + "height": 166 } ], "滑块-8.png": [ - { - "x": 434, - "y": 243, - "width": 93, - "height": 93 - }, { "x": 120, "y": 244, "width": 92, "height": 92 + }, + { + "x": 434, + "y": 243, + "width": 93, + "height": 93 } ], "滑块.png": [ - { - "x": 375, - "y": 407, - "width": 88, - "height": 89 - }, { "x": 131, "y": 408, "width": 87, "height": 88 + }, + { + "x": 375, + "y": 407, + "width": 88, + "height": 89 + } + ], + "iShot_2025-10-25_16.53.21.png": [ + { + "x": 119, + "y": 344, + "width": 91, + "height": 92 + }, + { + "x": 575, + "y": 342, + "width": 93, + "height": 94 + } + ], + "iShot_2025-10-25_16.53.40.png": [ + { + "x": 108, + "y": 353, + "width": 94, + "height": 91 + }, + { + "x": 365, + "y": 353, + "width": 95, + "height": 92 + } + ], + "iShot_2025-10-25_16.53.48.png": [ + { + "x": 122, + "y": 256, + "width": 90, + "height": 89 + }, + { + "x": 379, + "y": 256, + "width": 91, + "height": 90 + } + ], + "iShot_2025-10-25_16.53.57.png": [ + { + "x": 110, + "y": 282, + "width": 90, + "height": 88 + }, + { + "x": 380, + "y": 282, + "width": 90, + "height": 89 + } + ], + "iShot_2025-10-25_16.54.08.png": [ + { + "x": 119, + "y": 306, + "width": 93, + "height": 93 + }, + { + "x": 386, + "y": 306, + "width": 93, + "height": 94 + } + ], + "iShot_2025-10-25_16.54.15.png": [ + { + "x": 118, + "y": 360, + "width": 90, + "height": 88 + }, + { + "x": 386, + "y": 363, + "width": 91, + "height": 89 + } + ], + "iShot_2025-10-25_16.54.25.png": [ + { + "x": 121, + "y": 420, + "width": 88, + "height": 87 + }, + { + "x": 313, + "y": 420, + "width": 90, + "height": 88 + } + ], + "iShot_2025-10-25_16.54.32.png": [ + { + "x": 113, + "y": 292, + "width": 88, + "height": 88 + }, + { + "x": 346, + "y": 292, + "width": 88, + "height": 88 + } + ], + "iShot_2025-10-25_16.54.41.png": [ + { + "x": 118, + "y": 388, + "width": 88, + "height": 88 + }, + { + "x": 541, + "y": 388, + "width": 89, + "height": 89 + } + ], + "iShot_2025-10-25_16.54.54.png": [ + { + "x": 98, + "y": 334, + "width": 90, + "height": 88 + }, + { + "x": 310, + "y": 334, + "width": 92, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.02.png": [ + { + "x": 119, + "y": 349, + "width": 90, + "height": 88 + }, + { + "x": 401, + "y": 349, + "width": 92, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.09.png": [ + { + "x": 100, + "y": 351, + "width": 90, + "height": 88 + }, + { + "x": 382, + "y": 351, + "width": 92, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.14.png": [ + { + "x": 119, + "y": 365, + "width": 90, + "height": 88 + }, + { + "x": 400, + "y": 365, + "width": 91, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.21.png": [ + { + "x": 110, + "y": 220, + "width": 92, + "height": 89 + }, + { + "x": 519, + "y": 220, + "width": 90, + "height": 88 + } + ], + "iShot_2025-10-25_16.55.29.png": [ + { + "x": 114, + "y": 309, + "width": 90, + "height": 88 + }, + { + "x": 544, + "y": 309, + "width": 90, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.37.png": [ + { + "x": 107, + "y": 427, + "width": 87, + "height": 88 + }, + { + "x": 542, + "y": 427, + "width": 88, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.46.png": [ + { + "x": 117, + "y": 227, + "width": 88, + "height": 89 + }, + { + "x": 550, + "y": 227, + "width": 89, + "height": 90 + } + ], + "iShot_2025-10-25_16.55.52.png": [ + { + "x": 112, + "y": 314, + "width": 89, + "height": 91 + }, + { + "x": 409, + "y": 314, + "width": 90, + "height": 92 + } + ], + "iShot_2025-10-25_16.56.01.png": [ + { + "x": 119, + "y": 347, + "width": 90, + "height": 88 + }, + { + "x": 393, + "y": 350, + "width": 90, + "height": 89 } ] } \ No newline at end of file diff --git a/images/debug/template-iShot_2025-10-25_16.53.21.png b/images/debug/template-iShot_2025-10-25_16.53.21.png new file mode 100644 index 0000000..7cdddcd Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.53.21.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.53.40.png b/images/debug/template-iShot_2025-10-25_16.53.40.png new file mode 100644 index 0000000..7021691 Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.53.40.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.53.48.png b/images/debug/template-iShot_2025-10-25_16.53.48.png new file mode 100644 index 0000000..46936f2 Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.53.48.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.53.57.png b/images/debug/template-iShot_2025-10-25_16.53.57.png new file mode 100644 index 0000000..512f35c Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.53.57.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.54.08.png b/images/debug/template-iShot_2025-10-25_16.54.08.png new file mode 100644 index 0000000..6029cb6 Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.54.08.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.54.15.png b/images/debug/template-iShot_2025-10-25_16.54.15.png new file mode 100644 index 0000000..3117237 Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.54.15.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.54.25.png b/images/debug/template-iShot_2025-10-25_16.54.25.png new file mode 100644 index 0000000..97d0cf5 Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.54.25.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.54.32.png b/images/debug/template-iShot_2025-10-25_16.54.32.png new file mode 100644 index 0000000..e45e446 Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.54.32.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.54.41.png b/images/debug/template-iShot_2025-10-25_16.54.41.png new file mode 100644 index 0000000..c3c446c Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.54.41.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.54.54.png b/images/debug/template-iShot_2025-10-25_16.54.54.png new file mode 100644 index 0000000..053e798 Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.54.54.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.55.21.png b/images/debug/template-iShot_2025-10-25_16.55.21.png new file mode 100644 index 0000000..1fba413 Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.55.21.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.55.29.png b/images/debug/template-iShot_2025-10-25_16.55.29.png new file mode 100644 index 0000000..16b118d Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.55.29.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.55.37.png b/images/debug/template-iShot_2025-10-25_16.55.37.png new file mode 100644 index 0000000..f704c56 Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.55.37.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.55.46.png b/images/debug/template-iShot_2025-10-25_16.55.46.png new file mode 100644 index 0000000..7a0246f Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.55.46.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.55.52.png b/images/debug/template-iShot_2025-10-25_16.55.52.png new file mode 100644 index 0000000..db75509 Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.55.52.png differ diff --git a/images/debug/template-iShot_2025-10-25_16.56.01.png b/images/debug/template-iShot_2025-10-25_16.56.01.png new file mode 100644 index 0000000..31fa9cc Binary files /dev/null and b/images/debug/template-iShot_2025-10-25_16.56.01.png differ diff --git a/images/debug/template-滑块-1.png b/images/debug/template-滑块-1.png index 093f664..359f10f 100644 Binary files a/images/debug/template-滑块-1.png and b/images/debug/template-滑块-1.png differ diff --git a/images/debug/template-滑块-2.png b/images/debug/template-滑块-2.png index 5db92c3..d01ed5e 100644 Binary files a/images/debug/template-滑块-2.png and b/images/debug/template-滑块-2.png differ diff --git a/images/debug/template-滑块-3.png b/images/debug/template-滑块-3.png new file mode 100644 index 0000000..57a0a0b Binary files /dev/null and b/images/debug/template-滑块-3.png differ diff --git a/images/debug/template-滑块-4.png b/images/debug/template-滑块-4.png index 6c55392..78beeb3 100644 Binary files a/images/debug/template-滑块-4.png and b/images/debug/template-滑块-4.png differ diff --git a/images/debug/template-滑块-5.png b/images/debug/template-滑块-5.png index e3dbcfd..a7152e3 100644 Binary files a/images/debug/template-滑块-5.png and b/images/debug/template-滑块-5.png differ diff --git a/images/debug/template-滑块-6.png b/images/debug/template-滑块-6.png index 881ef08..8fd1b82 100644 Binary files a/images/debug/template-滑块-6.png and b/images/debug/template-滑块-6.png differ diff --git a/images/debug/template-滑块-8.png b/images/debug/template-滑块-8.png index b0d55da..1612bc6 100644 Binary files a/images/debug/template-滑块-8.png and b/images/debug/template-滑块-8.png differ diff --git a/images/debug/template-滑块.png b/images/debug/template-滑块.png index 10a065b..a3743f9 100644 Binary files a/images/debug/template-滑块.png and b/images/debug/template-滑块.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.53.21.png b/images/douban-target/iShot_2025-10-25_16.53.21.png new file mode 100644 index 0000000..b120668 Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.53.21.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.53.40.png b/images/douban-target/iShot_2025-10-25_16.53.40.png new file mode 100644 index 0000000..0057202 Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.53.40.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.53.48.png b/images/douban-target/iShot_2025-10-25_16.53.48.png new file mode 100644 index 0000000..a9817ed Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.53.48.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.53.57.png b/images/douban-target/iShot_2025-10-25_16.53.57.png new file mode 100644 index 0000000..ed272fd Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.53.57.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.54.08.png b/images/douban-target/iShot_2025-10-25_16.54.08.png new file mode 100644 index 0000000..ddb645f Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.54.08.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.54.15.png b/images/douban-target/iShot_2025-10-25_16.54.15.png new file mode 100644 index 0000000..da6aa7b Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.54.15.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.54.25.png b/images/douban-target/iShot_2025-10-25_16.54.25.png new file mode 100644 index 0000000..8f76594 Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.54.25.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.54.32.png b/images/douban-target/iShot_2025-10-25_16.54.32.png new file mode 100644 index 0000000..2b4a61b Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.54.32.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.54.41.png b/images/douban-target/iShot_2025-10-25_16.54.41.png new file mode 100644 index 0000000..6018283 Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.54.41.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.54.54.png b/images/douban-target/iShot_2025-10-25_16.54.54.png new file mode 100644 index 0000000..a3172e3 Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.54.54.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.55.02.png b/images/douban-target/iShot_2025-10-25_16.55.02.png new file mode 100644 index 0000000..b846c19 Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.55.02.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.55.09.png b/images/douban-target/iShot_2025-10-25_16.55.09.png new file mode 100644 index 0000000..8e1319e Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.55.09.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.55.14.png b/images/douban-target/iShot_2025-10-25_16.55.14.png new file mode 100644 index 0000000..d4da2ef Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.55.14.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.55.21.png b/images/douban-target/iShot_2025-10-25_16.55.21.png new file mode 100644 index 0000000..0adc010 Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.55.21.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.55.29.png b/images/douban-target/iShot_2025-10-25_16.55.29.png new file mode 100644 index 0000000..d0b11b0 Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.55.29.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.55.37.png b/images/douban-target/iShot_2025-10-25_16.55.37.png new file mode 100644 index 0000000..d71f15b Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.55.37.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.55.46.png b/images/douban-target/iShot_2025-10-25_16.55.46.png new file mode 100644 index 0000000..b8ed8ea Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.55.46.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.55.52.png b/images/douban-target/iShot_2025-10-25_16.55.52.png new file mode 100644 index 0000000..b91f35f Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.55.52.png differ diff --git a/images/douban-target/iShot_2025-10-25_16.56.01.png b/images/douban-target/iShot_2025-10-25_16.56.01.png new file mode 100644 index 0000000..e9476de Binary files /dev/null and b/images/douban-target/iShot_2025-10-25_16.56.01.png differ diff --git a/images/douban/iShot_2025-10-25_16.53.21.png b/images/douban/iShot_2025-10-25_16.53.21.png new file mode 100644 index 0000000..7e20b28 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.53.21.png differ diff --git a/images/douban/iShot_2025-10-25_16.53.40.png b/images/douban/iShot_2025-10-25_16.53.40.png new file mode 100644 index 0000000..fc55146 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.53.40.png differ diff --git a/images/douban/iShot_2025-10-25_16.53.48.png b/images/douban/iShot_2025-10-25_16.53.48.png new file mode 100644 index 0000000..6c70a7d Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.53.48.png differ diff --git a/images/douban/iShot_2025-10-25_16.53.57.png b/images/douban/iShot_2025-10-25_16.53.57.png new file mode 100644 index 0000000..ad78447 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.53.57.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.08.png b/images/douban/iShot_2025-10-25_16.54.08.png new file mode 100644 index 0000000..c5bc77a Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.08.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.15.png b/images/douban/iShot_2025-10-25_16.54.15.png new file mode 100644 index 0000000..1ec34cf Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.15.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.25.png b/images/douban/iShot_2025-10-25_16.54.25.png new file mode 100644 index 0000000..773a4f8 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.25.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.32.png b/images/douban/iShot_2025-10-25_16.54.32.png new file mode 100644 index 0000000..8cbcd89 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.32.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.41.png b/images/douban/iShot_2025-10-25_16.54.41.png new file mode 100644 index 0000000..f69362f Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.41.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.54.png b/images/douban/iShot_2025-10-25_16.54.54.png new file mode 100644 index 0000000..3645d08 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.54.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.02.png b/images/douban/iShot_2025-10-25_16.55.02.png new file mode 100644 index 0000000..1dc5473 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.02.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.09.png b/images/douban/iShot_2025-10-25_16.55.09.png new file mode 100644 index 0000000..61048e8 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.09.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.14.png b/images/douban/iShot_2025-10-25_16.55.14.png new file mode 100644 index 0000000..4f3a33f Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.14.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.21.png b/images/douban/iShot_2025-10-25_16.55.21.png new file mode 100644 index 0000000..ab10598 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.21.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.29.png b/images/douban/iShot_2025-10-25_16.55.29.png new file mode 100644 index 0000000..c23edfc Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.29.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.37.png b/images/douban/iShot_2025-10-25_16.55.37.png new file mode 100644 index 0000000..e617d30 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.37.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.46.png b/images/douban/iShot_2025-10-25_16.55.46.png new file mode 100644 index 0000000..1db4ac7 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.46.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.52.png b/images/douban/iShot_2025-10-25_16.55.52.png new file mode 100644 index 0000000..d7e7127 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.52.png differ diff --git a/images/douban/iShot_2025-10-25_16.56.01.png b/images/douban/iShot_2025-10-25_16.56.01.png new file mode 100644 index 0000000..44f0c75 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.56.01.png differ diff --git a/images/douban2/iShot_2025-10-25_16.53.21.png b/images/douban2/iShot_2025-10-25_16.53.21.png new file mode 100644 index 0000000..7e20b28 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.53.21.png differ diff --git a/images/douban2/iShot_2025-10-25_16.53.40.png b/images/douban2/iShot_2025-10-25_16.53.40.png new file mode 100644 index 0000000..fc55146 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.53.40.png differ diff --git a/images/douban2/iShot_2025-10-25_16.53.48.png b/images/douban2/iShot_2025-10-25_16.53.48.png new file mode 100644 index 0000000..6c70a7d Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.53.48.png differ diff --git a/images/douban2/iShot_2025-10-25_16.53.57.png b/images/douban2/iShot_2025-10-25_16.53.57.png new file mode 100644 index 0000000..ad78447 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.53.57.png differ diff --git a/images/douban2/iShot_2025-10-25_16.54.08.png b/images/douban2/iShot_2025-10-25_16.54.08.png new file mode 100644 index 0000000..c5bc77a Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.54.08.png differ diff --git a/images/douban2/iShot_2025-10-25_16.54.15.png b/images/douban2/iShot_2025-10-25_16.54.15.png new file mode 100644 index 0000000..1ec34cf Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.54.15.png differ diff --git a/images/douban2/iShot_2025-10-25_16.54.25.png b/images/douban2/iShot_2025-10-25_16.54.25.png new file mode 100644 index 0000000..773a4f8 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.54.25.png differ diff --git a/images/douban2/iShot_2025-10-25_16.54.32.png b/images/douban2/iShot_2025-10-25_16.54.32.png new file mode 100644 index 0000000..8cbcd89 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.54.32.png differ diff --git a/images/douban2/iShot_2025-10-25_16.54.41.png b/images/douban2/iShot_2025-10-25_16.54.41.png new file mode 100644 index 0000000..f69362f Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.54.41.png differ diff --git a/images/douban2/iShot_2025-10-25_16.54.54.png b/images/douban2/iShot_2025-10-25_16.54.54.png new file mode 100644 index 0000000..3645d08 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.54.54.png differ diff --git a/images/douban2/iShot_2025-10-25_16.55.02.png b/images/douban2/iShot_2025-10-25_16.55.02.png new file mode 100644 index 0000000..1dc5473 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.55.02.png differ diff --git a/images/douban2/iShot_2025-10-25_16.55.09.png b/images/douban2/iShot_2025-10-25_16.55.09.png new file mode 100644 index 0000000..61048e8 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.55.09.png differ diff --git a/images/douban2/iShot_2025-10-25_16.55.14.png b/images/douban2/iShot_2025-10-25_16.55.14.png new file mode 100644 index 0000000..4f3a33f Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.55.14.png differ diff --git a/images/douban2/iShot_2025-10-25_16.55.21.png b/images/douban2/iShot_2025-10-25_16.55.21.png new file mode 100644 index 0000000..ab10598 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.55.21.png differ diff --git a/images/douban2/iShot_2025-10-25_16.55.29.png b/images/douban2/iShot_2025-10-25_16.55.29.png new file mode 100644 index 0000000..c23edfc Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.55.29.png differ diff --git a/images/douban2/iShot_2025-10-25_16.55.37.png b/images/douban2/iShot_2025-10-25_16.55.37.png new file mode 100644 index 0000000..e617d30 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.55.37.png differ diff --git a/images/douban2/iShot_2025-10-25_16.55.46.png b/images/douban2/iShot_2025-10-25_16.55.46.png new file mode 100644 index 0000000..1db4ac7 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.55.46.png differ diff --git a/images/douban2/iShot_2025-10-25_16.55.52.png b/images/douban2/iShot_2025-10-25_16.55.52.png new file mode 100644 index 0000000..d7e7127 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.55.52.png differ diff --git a/images/douban2/iShot_2025-10-25_16.56.01.png b/images/douban2/iShot_2025-10-25_16.56.01.png new file mode 100644 index 0000000..44f0c75 Binary files /dev/null and b/images/douban2/iShot_2025-10-25_16.56.01.png differ diff --git a/images/output-cv/滑块-1.png b/images/output-cv/滑块-1.png deleted file mode 100644 index 2265cee..0000000 Binary files a/images/output-cv/滑块-1.png and /dev/null differ diff --git a/images/output-cv/滑块-4.png b/images/output-cv/滑块-4.png deleted file mode 100644 index 6315b3c..0000000 Binary files a/images/output-cv/滑块-4.png and /dev/null differ diff --git a/images/output-cv/滑块.png b/images/output-cv/滑块.png deleted file mode 100644 index 9888372..0000000 Binary files a/images/output-cv/滑块.png and /dev/null differ diff --git a/images/output-edge/滑块-1.png b/images/output-edge/滑块-1.png deleted file mode 100644 index 7e5f36e..0000000 Binary files a/images/output-edge/滑块-1.png and /dev/null differ diff --git a/images/output-edge/滑块-2.png b/images/output-edge/滑块-2.png deleted file mode 100644 index 3da96ea..0000000 Binary files a/images/output-edge/滑块-2.png and /dev/null differ diff --git a/images/output-edge/滑块-3.png b/images/output-edge/滑块-3.png deleted file mode 100644 index b6b1d20..0000000 Binary files a/images/output-edge/滑块-3.png and /dev/null differ diff --git a/images/output-edge/滑块-4.png b/images/output-edge/滑块-4.png deleted file mode 100644 index cb2ba29..0000000 Binary files a/images/output-edge/滑块-4.png and /dev/null differ diff --git a/images/output-edge/滑块-5.png b/images/output-edge/滑块-5.png deleted file mode 100644 index 0ce2e52..0000000 Binary files a/images/output-edge/滑块-5.png and /dev/null differ diff --git a/images/output-edge/滑块-6.png b/images/output-edge/滑块-6.png deleted file mode 100644 index 67e514f..0000000 Binary files a/images/output-edge/滑块-6.png and /dev/null differ diff --git a/images/output/iShot_2025-10-25_16.53.21.png b/images/output/iShot_2025-10-25_16.53.21.png new file mode 100644 index 0000000..4d744cd Binary files /dev/null and b/images/output/iShot_2025-10-25_16.53.21.png differ diff --git a/images/output/iShot_2025-10-25_16.53.40.png b/images/output/iShot_2025-10-25_16.53.40.png new file mode 100644 index 0000000..5e9a14e Binary files /dev/null and b/images/output/iShot_2025-10-25_16.53.40.png differ diff --git a/images/output/iShot_2025-10-25_16.53.48.png b/images/output/iShot_2025-10-25_16.53.48.png new file mode 100644 index 0000000..18fa36c Binary files /dev/null and b/images/output/iShot_2025-10-25_16.53.48.png differ diff --git a/images/output/iShot_2025-10-25_16.53.57.png b/images/output/iShot_2025-10-25_16.53.57.png new file mode 100644 index 0000000..768b481 Binary files /dev/null and b/images/output/iShot_2025-10-25_16.53.57.png differ diff --git a/images/output/iShot_2025-10-25_16.54.08.png b/images/output/iShot_2025-10-25_16.54.08.png new file mode 100644 index 0000000..9a6366f Binary files /dev/null and b/images/output/iShot_2025-10-25_16.54.08.png differ diff --git a/images/output/iShot_2025-10-25_16.54.15.png b/images/output/iShot_2025-10-25_16.54.15.png new file mode 100644 index 0000000..ca88961 Binary files /dev/null and b/images/output/iShot_2025-10-25_16.54.15.png differ diff --git a/images/output/iShot_2025-10-25_16.54.25.png b/images/output/iShot_2025-10-25_16.54.25.png new file mode 100644 index 0000000..27f0b07 Binary files /dev/null and b/images/output/iShot_2025-10-25_16.54.25.png differ diff --git a/images/output/iShot_2025-10-25_16.54.32.png b/images/output/iShot_2025-10-25_16.54.32.png new file mode 100644 index 0000000..9f93dff Binary files /dev/null and b/images/output/iShot_2025-10-25_16.54.32.png differ diff --git a/images/output/iShot_2025-10-25_16.54.41.png b/images/output/iShot_2025-10-25_16.54.41.png new file mode 100644 index 0000000..420ca78 Binary files /dev/null and b/images/output/iShot_2025-10-25_16.54.41.png differ diff --git a/images/output/iShot_2025-10-25_16.54.54.png b/images/output/iShot_2025-10-25_16.54.54.png new file mode 100644 index 0000000..b3c6bc3 Binary files /dev/null and b/images/output/iShot_2025-10-25_16.54.54.png differ diff --git a/images/output/iShot_2025-10-25_16.55.21.png b/images/output/iShot_2025-10-25_16.55.21.png new file mode 100644 index 0000000..f93437b Binary files /dev/null and b/images/output/iShot_2025-10-25_16.55.21.png differ diff --git a/images/output/iShot_2025-10-25_16.55.29.png b/images/output/iShot_2025-10-25_16.55.29.png new file mode 100644 index 0000000..030976b Binary files /dev/null and b/images/output/iShot_2025-10-25_16.55.29.png differ diff --git a/images/output/iShot_2025-10-25_16.55.37.png b/images/output/iShot_2025-10-25_16.55.37.png new file mode 100644 index 0000000..d967d58 Binary files /dev/null and b/images/output/iShot_2025-10-25_16.55.37.png differ diff --git a/images/output/iShot_2025-10-25_16.55.46.png b/images/output/iShot_2025-10-25_16.55.46.png new file mode 100644 index 0000000..5526860 Binary files /dev/null and b/images/output/iShot_2025-10-25_16.55.46.png differ diff --git a/images/output/iShot_2025-10-25_16.55.52.png b/images/output/iShot_2025-10-25_16.55.52.png new file mode 100644 index 0000000..09342db Binary files /dev/null and b/images/output/iShot_2025-10-25_16.55.52.png differ diff --git a/images/output/iShot_2025-10-25_16.56.01.png b/images/output/iShot_2025-10-25_16.56.01.png new file mode 100644 index 0000000..c48d510 Binary files /dev/null and b/images/output/iShot_2025-10-25_16.56.01.png differ diff --git a/images/output/滑块-1.png b/images/output/滑块-1.png index 50f9190..bb0e128 100644 Binary files a/images/output/滑块-1.png and b/images/output/滑块-1.png differ diff --git a/images/output/滑块-2.png b/images/output/滑块-2.png index c626d46..a94289b 100644 Binary files a/images/output/滑块-2.png and b/images/output/滑块-2.png differ diff --git a/images/output/滑块-3.png b/images/output/滑块-3.png index edd483d..d5098ea 100644 Binary files a/images/output/滑块-3.png and b/images/output/滑块-3.png differ diff --git a/images/output/滑块-4.png b/images/output/滑块-4.png index 83721d6..445682a 100644 Binary files a/images/output/滑块-4.png and b/images/output/滑块-4.png differ diff --git a/images/output/滑块-5.png b/images/output/滑块-5.png index a6a0eb8..b87461b 100644 Binary files a/images/output/滑块-5.png and b/images/output/滑块-5.png differ diff --git a/images/output/滑块-6.png b/images/output/滑块-6.png index b3f0b84..c7d0745 100644 Binary files a/images/output/滑块-6.png and b/images/output/滑块-6.png differ diff --git a/images/output/滑块-7.png b/images/output/滑块-7.png index e313dc9..bc9671d 100644 Binary files a/images/output/滑块-7.png and b/images/output/滑块-7.png differ diff --git a/images/output/滑块-8.png b/images/output/滑块-8.png index 2fedeba..5b04e53 100644 Binary files a/images/output/滑块-8.png and b/images/output/滑块-8.png differ diff --git a/images/output/滑块.png b/images/output/滑块.png index 4ccc9fd..1c79267 100644 Binary files a/images/output/滑块.png and b/images/output/滑块.png differ diff --git a/package.json b/package.json index fd0d2c4..ffc5db1 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,7 @@ "main": "index.js", "scripts": { "build": "tsc", - "detect": "ts-node --transpile-only src/detector.ts" + "detect": "ts-node --transpile-only src/cli.ts" }, "keywords": [], "author": "", diff --git a/release.md b/release.md new file mode 100644 index 0000000..dce7523 --- /dev/null +++ b/release.md @@ -0,0 +1,21 @@ +# 版本说明 + +## v1.0.0 + +发布日期:2025-xx-xx + +### 新增 +- 全新模块化检测管线:`SliderDetector` 结合多策略候选搜索与精炼流程。 +- 自学习滑块搜索器:在首个候选基础上自动寻找第二个滑块。 +- CLI 工具 `npm run detect`:集成 ground-truth 验证、批量检测与自定义目录参数。 +- `validator` 验证器:统一的 IoU 与容差匹配统计输出。 + +### 改进 +- 候选区域生成策略加入暗/亮阈值扫描、Sobel 边缘、颜色量化与 LAB 对比。 +- 候选评分整合颜色一致性、边缘平滑度、内部纹理稀疏度等权重。 +- 文档重构:README、spec 更新,新增开发模块说明。 + +### 兼容性 +- 运行环境:Node.js 18+。 +- 仍依赖 `sharp` 库,无额外原生依赖改动。 + diff --git a/spec.md b/spec.md index d841ca8..36a236c 100644 --- a/spec.md +++ b/spec.md @@ -1,140 +1,42 @@ +## 项目定位 -## 需求 -基于sharp图像处理库,实现图片中的滑块精确识别。 +豆瓣滑块验证码检测器使用 Node.js + Sharp,对静态截图中的滑块缺口进行自动定位。当前版本聚焦豆瓣样式的双滑块图片,要求识别 1~2 个滑块并输出带标注的结果图。 -## 滑块形状图片和标注图片 -滑块抠图放在images/slider目录中,这些图片是滑块的形状。注意:方形滑块的某些边可能有半圆形凹陷或凸起。 -标注滑块的图片放在images/target中,图中标有target文字的框框是目标。注意:要识别的不是target框,而是target所框的滑块。 +## 数据输入 -## 程序识别标准 -images/douban-target下放置手工红框标注的图片,**目标在红色框框内**,作为算法生成结果的比对。 -如果同一张图片程序生成框和人工框匹配,通过测试,否则继续优化。 -注意:不是下方绿色的滑块,是红框内的滑块。 +- `images/douban/`:待检测图片,命令行默认扫描该目录。 +- `images/douban-target/`:对应图片的人工红框标注,用于准确性验证。 +- `images/output/`:检测结果输出目录(自动生成,蓝框标注)。 +- 自定义目录可通过 `npm run detect -- --pic-dir=` 指定。 -## 豆瓣滑块截图 -images/douban目录放置豆瓣网滑块真实截图。 -完成代码后,使用该目录的图片中做验证,并用蓝色方框框出滑块准确位置,输出到images/output目录。 +## 检测要求 -## 思路 -先识别douban-target中的红框手工标注的位置,作为基准,检验output中的标注与基准比较,来确定是否正确识别 -douban-target中的图片是手工标注,所以允许output标注结果与基准有少量偏差,比如10px +1. 支持 1~2 个滑块的定位,允许图片存在缺失或额外背景元素。 +2. 输出的边界框应与人工标注的中心点偏差 ≤ 10 px,IoU ≥ 0.1 视为可接受匹配。 +3. 允许算法返回多个候选框;验证阶段会进行一对一匹配并统计准确率、召回率。 +4. 结果需在原图上以蓝色矩形绘制并写入 `images/output/`。 -改进思路: -1. 不能根据亮度,滑块的亮度不确定。 -2. 图片中滑块只有2个 -3. 滑块形状是正方形,其中2个边,有半圆凹陷或凸起。 -最佳方法是边缘检测。滑块边框色调基本一致。 +## 算法概览 +1. **预处理**:同时读取原始图与归一化图,生成原始 `RawImage`、增强版图像。 +2. **候选生成**(`src/detection/candidate-search.ts`): + - 多阈值暗/亮区域扫描 → 连通域过滤 + - Sobel 边缘图 → 形态学闭运算补全边界 + - 颜色量化 + LAB 色差检测 → 识别低对比度候选 + - IoU 去重后,依据正方形程度、颜色一致性、内部边缘稀疏度、外部梯度平滑度重新评分。 +3. **框体精炼**:使用边缘投影收紧候选框边界。 +4. **自学习扩展**(`src/detector-self-learning.ts`):将最佳候选作为模板,在同一水平带上搜索第二个滑块。 +5. **结果绘制**:`SliderDetector.annotate` 在输出图片上渲染所有有效框体。 -## 网友思路,滑块预处理 +## 验证流程 -先提取一下滑块的轮廓,抖音的滑块特征很明显,可以不用cv2.Canny来提取边缘特征。 +- CLI (`src/cli.ts`) 会加载 `ground-truth.json`,对默认目录进行逐图检测。 +- 使用 `SliderValidator` 计算匹配情况,终端输出每张图与总体的准确率、召回率。 +- 若指定 `--pic-dir`,仍会先运行 ground-truth 评估(若文件存在),再处理目标目录。 -具体步骤如下: - - 去除外围透明像素点(滑块外层的像素点的a值都是0) - - 将图片转成灰度图并进行二值化操作(0和255) - - 只保留二值化为255的像素点 - - 去除多余噪声 - -### 读取rgba格式的滑块 -import cv2 -input_img = cv2.imread("slide.png", cv2.IMREAD_UNCHANGED) -将透明值为0的像素点设置为纯黑色 - -### 取透明维度的值 -alpha_channel = input_img[:, :, 3] - -### 只使用rgb三个维度的值 -rgb_image = input_img[:, :, :3] -rgb_image[alpha_channel == 0] = [0, 0, 0] -提取白色边缘并设置成黑色,将其他像素点设置为白色 - -gray = cv2.cvtColor(rgb_image, cv2.COLOR_BGR2GRAY) -_, thresholded = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY) -white_img = np.ones_like(rgb_image) * 255 -white_img[thresholded == 255] = [0, 0, 0] - -### 去除噪声(判断某个黑色像素点周围3x3范围内有多少个黑色像素点,少于阈值认为是噪声) - -def count_black_neighbors_by_cv2(gray_image): - if gray_image.ndim == 3: - gray_image = cv2.cvtColor(gray_image, cv2.COLOR_BGR2GRAY) - _, binary_image = cv2.threshold(gray_image, 240, 255, cv2.THRESH_BINARY_INV) - binary_image = binary_image // 255 - kernel = np.ones((3, 3), dtype=np.uint8) - kernel[1, 1] = 0 - black_neighbors = cv2.filter2D(binary_image, -1, kernel) - # 设置边缘为0 - black_neighbors[:, 0] = 0 - black_neighbors[:, 109] = 0 - return black_neighbors - -当然也可以通过遍历来实现,这样更容易理解点 - -def count_black_neighbors_by_range(gray_image): - # 将图像转换为灰度图 - if len(gray_image.shape) == 3: - gray_image = cv2.cvtColor(gray_image, cv2.COLOR_BGR2GRAY) - # 二值化图像 - _, binary_image = cv2.threshold(gray_image, 240, 255, cv2.THRESH_BINARY_INV) - binary_image = binary_image // 255 - # 创建一个与输入图像大小相同的全零数组 - black_neighbors = np.zeros_like(binary_image) - - # 遍历图像中的3x3邻域,计算每个像素 - neighbor_offsets = [(-1, -1), (-1, 0), (-1, 1), - (0, -1), (0, 1), - (1, -1), (1, 0), (1, 1)] - - # 遍历每个像素 - rows, cols = binary_image.shape - for row in range(1, rows - 1): - for col in range(1, cols - 1): - # 当它本身不是黑色像素点的时候,就不计算 - if binary_image[row, col] != 1: - continue - count = 0 - for offset in neighbor_offsets: - neighbor_row, neighbor_col = row + offset[0], col + offset[1] - if binary_image[neighbor_row, neighbor_col] == 1: - count += 1 - black_neighbors[row, col] = count - - return black_neighbors - -black_neighbors = count_black_neighbors_by_range(white_img) -output = np.ones_like(rgb_image) * 255 -output[black_neighbors > 4] = 0 - - -好了,现在可以把上面看到的内容忘掉了,因为在实际识别的时候用不到(我发现不做处理比做处理识别的准确率要高很多),直接识别准确率甚至接近百分百了。 - -### 下面是识别的完整代码 - -import os -import cv2 - -def get_slide_distance(bg_path, slide_path): - ''' - 识别滑块具体位置,返回位置比例: 位置/图片宽度 - 使用的时候再乘以实际图片宽度即可 - ''' - bg_img = cv2.imread(bg_path) - sd_img = cv2.imread(slide_path) - bg_gray = cv2.cvtColor(bg_img, cv2.COLOR_BGR2GRAY) - bg_gray = cv2.GaussianBlur(bg_gray, (5, 5), 0) - bg_edge = cv2.Canny(bg_gray, 30, 100) - rgb_bg_gray = cv2.cvtColor(bg_edge, cv2.COLOR_GRAY2RGB) - - sd_gray = cv2.cvtColor(sd_img, cv2.COLOR_BGR2GRAY) - sd_gray = cv2.GaussianBlur(sd_gray, (5, 5), 0) - sd_edge = cv2.Canny(sd_gray, 30, 100) - rgb_sd_gray = cv2.cvtColor(sd_edge, cv2.COLOR_GRAY2RGB) - result = cv2.matchTemplate(rgb_bg_gray, rgb_sd_gray, cv2.TM_CCORR_NORMED) - _, _, _, max_loc = cv2.minMaxLoc(result) - cv2.rectangle(bg_img, (max_loc[0], max_loc[1]), (max_loc[0]+110, max_loc[1] + 110), - (0, 255, 0), 2) - result_path = os.path.join(os.path.dirname(bg_path), "result.png") - cv2.imwrite(result_path, bg_img) - return max_loc[0]/bg_gray.shape[1] +## 开发约定 +- 代码组织遵循模块划分:`detector.ts`(主检测器)、`detection/`(候选策略)、`utils/`(基础工具)、`validator.ts`(验证)。 +- 新增检测策略时建议在 `candidate-search.ts` 中扩展,对应评分指标需保持可调节权重。 +- 所有新增图像应放入 `images/douban` / `images/douban-target` 并更新 `ground-truth.json`。 +- 运行 `npm run build` 进行类型检查;`npm run detect` 验收算法表现。 diff --git a/src/analyze-6.ts b/src/analyze-6.ts deleted file mode 100644 index e0b2579..0000000 --- a/src/analyze-6.ts +++ /dev/null @@ -1,75 +0,0 @@ -import sharp from 'sharp'; -import * as path from 'path'; - -async function analyze() { - const imagePath = path.join(__dirname, '..', 'images', 'douban', '滑块-6.png'); - const { data, info } = await sharp(imagePath).raw().toBuffer({ resolveWithObject: true }); - const { width, height, channels } = info; - - console.log(`图片尺寸: ${width}x${height}`); - - const darkThreshold = 85; - const darkMap = new Uint8Array(width * height); - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx], g = data[idx + 1], b = data[idx + 2]; - const brightness = r * 0.299 + g * 0.587 + b * 0.114; - darkMap[y * width + x] = brightness < darkThreshold ? 1 : 0; - } - } - - // 找连通区域 - const visited = new Uint8Array(width * height); - const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = []; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = y * width + x; - if (visited[idx] === 0 && darkMap[idx] === 1) { - let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0; - const stack: Array<[number, number]> = [[x, y]]; - - while (stack.length > 0) { - const [cx, cy] = stack.pop()!; - if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue; - const cidx = cy * width + cx; - if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue; - - visited[cidx] = 1; - pixelCount++; - minX = Math.min(minX, cx); - minY = Math.min(minY, cy); - maxX = Math.max(maxX, cx); - maxY = Math.max(maxY, cy); - - stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]); - } - - const w = maxX - minX + 1; - const h = maxY - minY + 1; - if (w >= 20 && h >= 20 && w < width * 0.9 && h < height * 0.9) { - regions.push({x: minX, y: minY, w, h, pixels: pixelCount}); - } - } - } - } - - console.log(`\n找到 ${regions.length} 个区域`); - regions.sort((a, b) => b.pixels - a.pixels); - - console.log('\n所有区域(按面积排序):'); - regions.forEach((r, i) => { - const aspectRatio = r.w / r.h; - const density = r.pixels / (r.w * r.h); - const match = - r.w >= 50 && r.w <= 95 && - r.h >= 50 && r.h <= 95 && - aspectRatio >= 0.85 && aspectRatio <= 1.18 && - density > 0.65; - console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)} ${match ? '✓' : ''}`); - }); -} - -analyze().catch(console.error); diff --git a/src/analyze-pixel.ts b/src/analyze-pixel.ts deleted file mode 100644 index ff36beb..0000000 --- a/src/analyze-pixel.ts +++ /dev/null @@ -1,91 +0,0 @@ -import sharp from 'sharp'; -import * as path from 'path'; - -/** - * 分析特定区域的像素亮度 - */ -async function analyzeRegion(imagePath: string, regions: Array<{name: string, x: number, y: number, width: number, height: number}>) { - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width: imgWidth, height: imgHeight, channels } = info; - - console.log(`\n分析图片: ${path.basename(imagePath)}`); - console.log(`图片尺寸: ${imgWidth}×${imgHeight}\n`); - - for (const region of regions) { - console.log(`区域: ${region.name}`); - console.log(` 位置: (${region.x}, ${region.y}), 大小: ${region.width}×${region.height}`); - - let totalBrightness = 0; - let darkCount = 0; // < 90 - let count = 0; - let minBright = 255; - let maxBright = 0; - - for (let y = region.y; y < region.y + region.height && y < imgHeight; y++) { - for (let x = region.x; x < region.x + region.width && x < imgWidth; x++) { - const idx = (y * imgWidth + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - const brightness = r * 0.299 + g * 0.587 + b * 0.114; - - totalBrightness += brightness; - count++; - - if (brightness < 90) darkCount++; - minBright = Math.min(minBright, brightness); - maxBright = Math.max(maxBright, brightness); - } - } - - const avgBrightness = count > 0 ? totalBrightness / count : 0; - const darkRatio = count > 0 ? (darkCount / count * 100) : 0; - - console.log(` 平均亮度: ${avgBrightness.toFixed(1)}`); - console.log(` 亮度范围: ${minBright.toFixed(0)} - ${maxBright.toFixed(0)}`); - console.log(` 暗像素比例(<90): ${darkRatio.toFixed(1)}%`); - console.log(); - } -} - -async function main() { - const baseDir = path.join(__dirname, '..'); - - // 分析几个关键图片的特定区域 - - // 滑块.png - 完全未检测到 - await analyzeRegion( - path.join(baseDir, 'images', 'douban', '滑块.png'), - [ - { name: '左侧滑块', x: 131, y: 408, width: 87, height: 88 }, - { name: '右侧滑块', x: 375, y: 407, width: 88, height: 89 }, - { name: '背景区域', x: 300, y: 200, width: 50, height: 50 } - ] - ); - - // 滑块-2.png - 只检测到1个,漏检2个 - await analyzeRegion( - path.join(baseDir, 'images', 'douban', '滑块-2.png'), - [ - { name: '左侧滑块', x: 125, y: 245, width: 89, height: 91 }, - { name: '右侧滑块', x: 454, y: 244, width: 90, height: 92 }, - { name: '误检区域', x: 660, y: 164, width: 78, height: 51 } - ] - ); - - // 滑块-6.png - 检测到2个但都是误检 - await analyzeRegion( - path.join(baseDir, 'images', 'douban', '滑块-6.png'), - [ - { name: '左侧目标', x: 116, y: 319, width: 91, height: 91 }, - { name: '右侧目标', x: 574, y: 318, width: 92, height: 92 }, - { name: '误检1', x: 149, y: 456, width: 95, height: 107 }, - { name: '误检2', x: 68, y: 437, width: 74, height: 126 } - ] - ); -} - -main().catch(console.error); diff --git a/src/analyze.ts b/src/analyze.ts deleted file mode 100644 index 6cd1910..0000000 --- a/src/analyze.ts +++ /dev/null @@ -1,61 +0,0 @@ -import sharp from 'sharp'; -import * as fs from 'fs'; -import * as path from 'path'; - -async function analyzeRedBox(imagePath: string) { - const basename = path.basename(imagePath); - console.log(`\n分析: ${basename}`); - - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - // 查找红色像素 - let minX = width; - let minY = height; - let maxX = 0; - let maxY = 0; - let foundRed = false; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - - if (r > 200 && g < 100 && b < 100) { - foundRed = true; - minX = Math.min(minX, x); - minY = Math.min(minY, y); - maxX = Math.max(maxX, x); - maxY = Math.max(maxY, y); - } - } - } - - if (foundRed) { - const boxWidth = maxX - minX + 1; - const boxHeight = maxY - minY + 1; - console.log(` 红框位置: [x=${minX}, y=${minY}, w=${boxWidth}, h=${boxHeight}]`); - console.log(` 宽高比: ${(boxWidth / boxHeight).toFixed(2)}`); - console.log(` 相对位置: y=${(minY / height * 100).toFixed(1)}% (高度)`); - } else { - console.log(` 未找到红框`); - } -} - -async function main() { - const baseDir = path.join(__dirname, '..'); - const targetDir = path.join(baseDir, 'images', 'douban-target'); - - const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png')).slice(0, 9); - - for (const file of files) { - await analyzeRedBox(path.join(targetDir, file)); - } -} - -main().catch(console.error); diff --git a/src/cli.ts b/src/cli.ts new file mode 100644 index 0000000..a841bb9 --- /dev/null +++ b/src/cli.ts @@ -0,0 +1,216 @@ +import fs from 'fs'; +import path from 'path'; +import { SliderDetector } from './detector'; +import { SliderValidator } from './validator'; +import { BoundingBox, Rectangle } from './types'; + +type GroundTruth = Record; + +async function main() { + const detector = new SliderDetector(); + const validator = new SliderValidator(); + const baseDir = path.join(__dirname, '..'); + const doubanDir = path.join(baseDir, 'images', 'douban'); + const outputDir = path.join(baseDir, 'images', 'output'); + const groundTruthPath = path.join(baseDir, 'ground-truth.json'); + const detectionCache = new Map(); + + const detect = async (imagePath: string): Promise => { + if (!detectionCache.has(imagePath)) { + const result = await detector.detectSlider(imagePath, undefined, true); + detectionCache.set(imagePath, result); + } + return detectionCache.get(imagePath)!; + }; + + console.log('=== 滑块检测 CLI ===\n'); + + const groundTruth = loadGroundTruth(groundTruthPath); + if (groundTruth) { + await evaluateAgainstGroundTruth({ + doubanDir, + groundTruth, + detect, + validator, + }); + } else { + console.log('未找到 ground-truth.json,跳过准确性验证。\n'); + } + + const customArg = process.argv.find((arg) => arg.startsWith('--pic-dir=')); + const processDir = customArg + ? path.join(baseDir, customArg.split('=')[1]) + : doubanDir; + const processDirName = customArg ? customArg.split('=')[1] : 'images/douban'; + + await ensureDir(outputDir); + await processDirectory({ + processDir, + processDirName, + outputDir, + detect, + detector, + }); + + console.log('\n=== 检测完成 ==='); +} + +function loadGroundTruth(filePath: string): GroundTruth | null { + if (!fs.existsSync(filePath)) { + return null; + } + try { + const content = fs.readFileSync(filePath, 'utf-8'); + return JSON.parse(content) as GroundTruth; + } catch (error) { + console.warn(`无法解析 ground-truth.json:${error}`); + return null; + } +} + +async function ensureDir(dir: string): Promise { + await fs.promises.mkdir(dir, { recursive: true }); +} + +async function evaluateAgainstGroundTruth({ + doubanDir, + groundTruth, + detect, + validator, +}: { + doubanDir: string; + groundTruth: GroundTruth; + detect: (imagePath: string) => Promise; + validator: SliderValidator; +}): Promise { + console.log('1. 验证算法准确性(容差:10px)...\n'); + + let totalMatched = 0; + let totalTargets = 0; + let totalDetected = 0; + + for (const [fileName, expectedBoxes] of Object.entries(groundTruth)) { + const imagePath = path.join(doubanDir, fileName); + if (!fs.existsSync(imagePath)) { + console.log(` 跳过 ${fileName}(原图不存在)`); + continue; + } + + const detections = await detect(imagePath); + const detectedBoxes = Array.isArray(detections) ? detections : []; + + const result = await validator.validateDetection( + detectedBoxes, + expectedBoxes, + 10 + ); + + console.log(` ${fileName}:`); + console.log( + ` 目标 ${result.totalTargets} 个 | 检测 ${result.detectedCount} 个 | 匹配 ${result.matchedCount} 个` + ); + console.log( + ` 准确率: ${(result.precision * 100).toFixed(1)}% | 召回率: ${(result.recall * 100).toFixed(1)}%` + ); + + if (result.matches.length > 0) { + result.matches.forEach((match, index) => { + console.log( + ` 匹配 ${index + 1}: IoU=${match.iou.toFixed(3)}` + ); + }); + } + + const missed = result.totalTargets - result.matchedCount; + if (missed > 0) { + console.log(` ⚠️ 漏检 ${missed} 个滑块`); + } + + if (result.unmatched.length > 0) { + console.log(` ⚠️ 误检 ${result.unmatched.length} 个滑块`); + } + + console.log(''); + + totalMatched += result.matchedCount; + totalTargets += result.totalTargets; + totalDetected += result.detectedCount; + } + + if (totalTargets > 0) { + const overallPrecision = + totalDetected > 0 ? (totalMatched / totalDetected) * 100 : 0; + const overallRecall = (totalMatched / totalTargets) * 100; + + console.log('总体统计:'); + console.log(` 总目标数: ${totalTargets}`); + console.log(` 总检测数: ${totalDetected}`); + console.log(` 匹配成功: ${totalMatched}`); + console.log( + ` 总体准确率: ${overallPrecision.toFixed(1)}% | 总体召回率: ${overallRecall.toFixed(1)}%\n` + ); + } else { + console.log(' ground-truth.json 中没有记录可供验证。\n'); + } +} + +async function processDirectory({ + processDir, + processDirName, + outputDir, + detect, + detector, +}: { + processDir: string; + processDirName: string; + outputDir: string; + detect: (imagePath: string) => Promise; + detector: SliderDetector; +}): Promise { + console.log(`2. 处理 ${processDirName} 目录下的滑块图片...\n`); + + if (!fs.existsSync(processDir)) { + console.log(` 错误:找不到目录 ${processDir}`); + return; + } + + const files = fs + .readdirSync(processDir) + .filter((file) => file.toLowerCase().endsWith('.png')); + + if (files.length === 0) { + console.log(' 没有找到需要处理的图片。'); + return; + } + + let processed = 0; + for (const file of files) { + const inputPath = path.join(processDir, file); + const outputPath = path.join(outputDir, file); + + const detections = await detect(inputPath); + + if (detections && detections.length > 0) { + await detector.annotate(inputPath, detections, outputPath); + const boxSummary = detections + .map( + (box, index) => + `#${index + 1}[x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}]` + ) + .join(', '); + console.log(` ✅ ${file}: 检测到 ${detections.length} 个滑块 ${boxSummary}`); + processed++; + } else { + console.log(` ❌ ${file}: 未检测到滑块`); + } + } + + console.log( + `\n 处理完成: ${processed}/${files.length} 张图片,结果输出到 ${outputDir}` + ); +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/src/debug-failed.ts b/src/debug-failed.ts deleted file mode 100644 index c1ad8df..0000000 --- a/src/debug-failed.ts +++ /dev/null @@ -1,128 +0,0 @@ -import sharp from 'sharp'; -import * as fs from 'fs'; -import * as path from 'path'; - -async function debugSingle(imagePath: string) { - const basename = path.basename(imagePath); - console.log(`\n=== ${basename} ===`); - - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - const darkThreshold = 85; - const darkMap = new Uint8Array(width * height); - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - const brightness = (r * 0.299 + g * 0.587 + b * 0.114); - - darkMap[y * width + x] = brightness < darkThreshold ? 1 : 0; - } - } - - const visited = new Uint8Array(width * height); - const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = []; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = y * width + x; - if (visited[idx] === 0 && darkMap[idx] === 1) { - let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0; - const stack: Array<[number, number]> = [[x, y]]; - - while (stack.length > 0) { - const [cx, cy] = stack.pop()!; - if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue; - const cidx = cy * width + cx; - if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue; - - visited[cidx] = 1; - pixelCount++; - minX = Math.min(minX, cx); - minY = Math.min(minY, cy); - maxX = Math.max(maxX, cx); - maxY = Math.max(maxY, cy); - - stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]); - } - - const w = maxX - minX + 1; - const h = maxY - minY + 1; - if (w >= 20 && h >= 20 && w < width * 0.9 && h < height * 0.9) { - regions.push({x: minX, y: minY, w, h, pixels: pixelCount}); - } - } - } - } - - console.log(`找到 ${regions.length} 个有效连通区域`); - - // 过滤符合条件的候选 - const candidates = regions.filter(r => { - const aspectRatio = r.w / r.h; - const density = r.pixels / (r.w * r.h); - const centerY = r.y + r.h / 2; - - return ( - r.w >= 50 && r.w <= 95 && - r.h >= 50 && r.h <= 95 && - aspectRatio >= 0.85 && aspectRatio <= 1.18 && - centerY > height * 0.12 && - centerY < height * 0.78 && - density > 0.65 - ); - }); - - console.log(`符合严格条件的候选: ${candidates.length} 个`); - - if (candidates.length > 0) { - candidates.forEach((r, i) => { - const aspectRatio = r.w / r.h; - const density = r.pixels / (r.w * r.h); - console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`); - }); - } else { - // 尝试放宽条件 - const relaxed = regions.filter(r => { - const aspectRatio = r.w / r.h; - const density = r.pixels / (r.w * r.h); - - return ( - r.w >= 45 && r.w <= 100 && - r.h >= 45 && r.h <= 100 && - aspectRatio >= 0.75 && aspectRatio <= 1.33 && - r.y < height * 0.82 && - r.y > height * 0.06 && - density > 0.55 - ); - }); - - console.log(`符合放宽条件的候选: ${relaxed.length} 个`); - relaxed.slice(0, 5).forEach((r, i) => { - const aspectRatio = r.w / r.h; - const density = r.pixels / (r.w * r.h); - console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`); - }); - } -} - -async function main() { - const baseDir = path.join(__dirname, '..'); - const doubanDir = path.join(baseDir, 'images', 'douban'); - - // 检查未检测到的图片 - const failedFiles = ['滑块-2.png', '滑块-3.png', '滑块-6.png', '滑块-7.png', '滑块.png']; - - for (const file of failedFiles) { - await debugSingle(path.join(doubanDir, file)); - } -} - -main().catch(console.error); diff --git a/src/debug-regions.ts b/src/debug-regions.ts deleted file mode 100644 index 6438a89..0000000 --- a/src/debug-regions.ts +++ /dev/null @@ -1,98 +0,0 @@ -import sharp from 'sharp'; -import * as fs from 'fs'; -import * as path from 'path'; - -async function debugRegions(imagePath: string) { - const basename = path.basename(imagePath); - console.log(`\n=== ${basename} ===`); - - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - // 检测暗色像素 - const darkThreshold = 85; - const darkMap = new Uint8Array(width * height); - let darkPixelCount = 0; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - const brightness = (r * 0.299 + g * 0.587 + b * 0.114); - - if (brightness < darkThreshold) { - darkMap[y * width + x] = 1; - darkPixelCount++; - } - } - } - - console.log(`暗色像素占比: ${(darkPixelCount / (width * height) * 100).toFixed(2)}%`); - - // 找连通区域(简化版) - const visited = new Uint8Array(width * height); - const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = []; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = y * width + x; - if (visited[idx] === 0 && darkMap[idx] === 1) { - let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0; - const stack: Array<[number, number]> = [[x, y]]; - - while (stack.length > 0) { - const [cx, cy] = stack.pop()!; - if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue; - const cidx = cy * width + cx; - if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue; - - visited[cidx] = 1; - pixelCount++; - minX = Math.min(minX, cx); - minY = Math.min(minY, cy); - maxX = Math.max(maxX, cx); - maxY = Math.max(maxY, cy); - - stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]); - } - - const w = maxX - minX + 1; - const h = maxY - minY + 1; - if (w >= 20 && h >= 20) { - regions.push({x: minX, y: minY, w, h, pixels: pixelCount}); - } - } - } - } - - console.log(`找到 ${regions.length} 个连通区域(>= 20x20)`); - - // 按面积排序并显示前10个 - regions.sort((a, b) => (b.w * b.h) - (a.w * a.h)); - - console.log('\n前10个最大区域:'); - for (let i = 0; i < Math.min(10, regions.length); i++) { - const r = regions[i]; - const aspectRatio = r.w / r.h; - const density = r.pixels / (r.w * r.h); - console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`); - } -} - -async function main() { - const baseDir = path.join(__dirname, '..'); - const doubanDir = path.join(baseDir, 'images', 'douban'); - - const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')).slice(0, 3); - - for (const file of files) { - await debugRegions(path.join(doubanDir, file)); - } -} - -main().catch(console.error); diff --git a/src/debug-results.ts b/src/debug-results.ts deleted file mode 100644 index c7b46f7..0000000 --- a/src/debug-results.ts +++ /dev/null @@ -1,118 +0,0 @@ -import * as fs from 'fs'; -import * as path from 'path'; -import { SliderValidator, BoundingBox as ValidatorBox } from './validator'; -import { SliderDetector } from './detector'; - -async function main() { - const detector = new SliderDetector(); - const validator = new SliderValidator(); - const baseDir = path.join(__dirname, '..'); - const doubanDir = path.join(baseDir, 'images', 'douban'); - const doubanTargetDir = path.join(baseDir, 'images', 'douban-target'); - - console.log('=== 详细调试检测结果 ===\n'); - - const files = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png')); - - for (const file of files) { - const imagePath = path.join(doubanDir, file); - const targetPath = path.join(doubanTargetDir, file); - - if (!fs.existsSync(imagePath)) continue; - - console.log(`\n【${file}】`); - console.log('─'.repeat(60)); - - // 获取标准答案 - const targetBoxes = await validator.extractRedBoxes(targetPath); - console.log(`标准答案(${targetBoxes.length}个):`); - targetBoxes.forEach((box, i) => { - console.log(` 目标${i + 1}: x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`); - }); - - // 获取检测结果 - const detected = await detector.detectSlider(imagePath, undefined, true); - const detectedBoxes = detected ? (Array.isArray(detected) ? detected : [detected]) : []; - - console.log(`\n检测结果(${detectedBoxes.length}个):`); - detectedBoxes.forEach((box, i) => { - console.log(` 检测${i + 1}: x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`); - }); - - // 详细匹配分析 - console.log(`\n匹配分析(容差10px):`); - - const detectedValidatorBoxes: ValidatorBox[] = detectedBoxes.map(b => ({ - x: b.x, - y: b.y, - width: b.width, - height: b.height - })); - - const result = await validator.validateDetection(detectedValidatorBoxes, targetBoxes, 10); - - // 显示每个匹配对 - if (result.matches.length > 0) { - console.log(` 成功匹配 ${result.matches.length} 对:`); - result.matches.forEach((match, i) => { - const det = match.detected; - const tgt = match.target; - const detCenter = [det.x + det.width/2, det.y + det.height/2]; - const tgtCenter = [tgt.x + tgt.width/2, tgt.y + tgt.height/2]; - const distance = Math.sqrt( - Math.pow(detCenter[0] - tgtCenter[0], 2) + - Math.pow(detCenter[1] - tgtCenter[1], 2) - ); - console.log(` 配对${i + 1}: IoU=${match.iou.toFixed(3)}, 中心距离=${distance.toFixed(1)}px`); - console.log(` 检测框: (${det.x}, ${det.y}) ${det.width}×${det.height}`); - console.log(` 目标框: (${tgt.x}, ${tgt.y}) ${tgt.width}×${tgt.height}`); - }); - } - - // 显示漏检的目标 - if (result.matchedCount < result.totalTargets) { - const matched = result.matches.map(m => m.target); - const unmatched = targetBoxes.filter(t => - !matched.some(m => m.x === t.x && m.y === t.y && m.width === t.width && m.height === t.height) - ); - console.log(` ⚠️ 漏检 ${unmatched.length} 个目标:`); - unmatched.forEach((box, i) => { - console.log(` 目标${i + 1}: (${box.x}, ${box.y}) ${box.width}×${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`); - - // 找最接近的检测框 - if (detectedValidatorBoxes.length > 0) { - let minDist = Infinity; - let closest = null; - for (const det of detectedValidatorBoxes) { - const detCenter = [det.x + det.width/2, det.y + det.height/2]; - const tgtCenter = [box.x + box.width/2, box.y + box.height/2]; - const dist = Math.sqrt( - Math.pow(detCenter[0] - tgtCenter[0], 2) + - Math.pow(detCenter[1] - tgtCenter[1], 2) - ); - if (dist < minDist) { - minDist = dist; - closest = det; - } - } - if (closest) { - console.log(` 最接近检测框: (${closest.x}, ${closest.y}) ${closest.width}×${closest.height}, 距离=${minDist.toFixed(1)}px`); - } - } - }); - } - - // 显示误检 - if (result.unmatched.length > 0) { - console.log(` ⚠️ 误检 ${result.unmatched.length} 个:`); - result.unmatched.forEach((box, i) => { - console.log(` 误检${i + 1}: (${box.x}, ${box.y}) ${box.width}×${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`); - }); - } - - console.log(` 准确率: ${(result.precision * 100).toFixed(1)}%`); - console.log(` 召回率: ${(result.recall * 100).toFixed(1)}%`); - } -} - -main().catch(console.error); diff --git a/src/debug-single.ts b/src/debug-single.ts deleted file mode 100644 index c01af50..0000000 --- a/src/debug-single.ts +++ /dev/null @@ -1,102 +0,0 @@ -import sharp from 'sharp'; -import * as path from 'path'; - -async function debugImage() { - const imagePath = path.join(__dirname, '..', 'images', 'douban', '滑块.png'); - - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - console.log(`图片尺寸: ${width}×${height}`); - console.log('\n=== 测试不同阈值 ===\n'); - - // 测试不同的暗色阈值 - for (const threshold of [60, 80, 100, 120, 140, 160, 180, 200]) { - let darkCount = 0; - const regions: Array<{x: number, y: number, count: number}> = []; - - // 粗略统计 - for (let y = 0; y < height; y += 10) { - for (let x = 0; x < width; x += 10) { - let localDark = 0; - for (let dy = 0; dy < 10 && y + dy < height; dy++) { - for (let dx = 0; dx < 10 && x + dx < width; dx++) { - const idx = ((y + dy) * width + (x + dx)) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - const brightness = r * 0.299 + g * 0.587 + b * 0.114; - - if (brightness < threshold) { - darkCount++; - localDark++; - } - } - } - - if (localDark > 50) { - regions.push({x, y, count: localDark}); - } - } - } - - const darkRatio = (darkCount / (width * height / 100) * 100).toFixed(1); - console.log(`阈值 < ${threshold}: 暗像素比例 ${darkRatio}%, 暗色区域数: ${regions.length}`); - - if (regions.length > 0 && regions.length < 10) { - regions.sort((a, b) => b.count - a.count); - console.log(` 主要暗色区域:`); - regions.slice(0, 3).forEach((r, i) => { - console.log(` 区域${i + 1}: (${r.x}, ${r.y}), 密度: ${r.count}`); - }); - } - } - - console.log('\n=== 测试白色阈值 ===\n'); - - // 测试白色阈值 - for (const threshold of [130, 150, 170, 190, 210]) { - let whiteCount = 0; - const regions: Array<{x: number, y: number, count: number}> = []; - - for (let y = 0; y < height; y += 10) { - for (let x = 0; x < width; x += 10) { - let localWhite = 0; - for (let dy = 0; dy < 10 && y + dy < height; dy++) { - for (let dx = 0; dx < 10 && x + dx < width; dx++) { - const idx = ((y + dy) * width + (x + dx)) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - const brightness = r * 0.299 + g * 0.587 + b * 0.114; - - if (brightness > threshold && Math.abs(r - g) < 60 && Math.abs(g - b) < 60) { - whiteCount++; - localWhite++; - } - } - } - - if (localWhite > 50) { - regions.push({x, y, count: localWhite}); - } - } - } - - const whiteRatio = (whiteCount / (width * height / 100) * 100).toFixed(1); - console.log(`阈值 > ${threshold}: 白像素比例 ${whiteRatio}%, 白色区域数: ${regions.length}`); - - if (regions.length > 0 && regions.length < 15) { - regions.sort((a, b) => b.count - a.count); - console.log(` 主要白色区域:`); - regions.slice(0, 5).forEach((r, i) => { - console.log(` 区域${i + 1}: (${r.x}, ${r.y}), 密度: ${r.count}`); - }); - } - } -} - -debugImage().catch(console.error); diff --git a/src/debug-threshold.ts b/src/debug-threshold.ts deleted file mode 100644 index 7438915..0000000 --- a/src/debug-threshold.ts +++ /dev/null @@ -1,50 +0,0 @@ -import sharp from 'sharp'; -import * as path from 'path'; - -async function debugImage(imagePath: string) { - const basename = path.basename(imagePath); - console.log(`\n=== 调试: ${basename} ===`); - - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - // 使用不同的阈值测试 - for (const threshold of [70, 80, 90, 100, 110]) { - const darkMap = new Uint8Array(width * height); - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - const brightness = (r * 0.299 + g * 0.587 + b * 0.114); - - darkMap[y * width + x] = brightness < threshold ? 1 : 0; - } - } - - // 简单统计暗像素数量 - let darkCount = 0; - for (let i = 0; i < darkMap.length; i++) { - if (darkMap[i] === 1) darkCount++; - } - - console.log(`阈值 ${threshold}: 暗像素 ${darkCount} (${(darkCount / darkMap.length * 100).toFixed(1)}%)`); - } -} - -async function main() { - const baseDir = path.join(__dirname, '..'); - const doubanDir = path.join(baseDir, 'images', 'douban'); - - await debugImage(path.join(doubanDir, '滑块-2.png')); - await debugImage(path.join(doubanDir, '滑块-3.png')); - await debugImage(path.join(doubanDir, '滑块.png')); - await debugImage(path.join(doubanDir, '滑块-6.png')); -} - -main().catch(console.error); diff --git a/src/debug.ts b/src/debug.ts deleted file mode 100644 index 22a6298..0000000 --- a/src/debug.ts +++ /dev/null @@ -1,82 +0,0 @@ -import sharp from 'sharp'; -import * as fs from 'fs'; -import * as path from 'path'; - -async function analyzeImage(imagePath: string) { - console.log(`\n分析图片: ${path.basename(imagePath)}`); - - const metadata = await sharp(imagePath).metadata(); - console.log(`尺寸: ${metadata.width}x${metadata.height}`); - console.log(`通道数: ${metadata.channels}`); - console.log(`颜色空间: ${metadata.space}`); - - // 分析颜色分布 - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - // 统计不同颜色区域 - let darkPixels = 0; - let brightPixels = 0; - let colorfulPixels = 0; - - for (let i = 0; i < data.length; i += channels) { - const r = data[i]; - const g = data[i + 1]; - const b = data[i + 2]; - const avg = (r + g + b) / 3; - - if (avg < 50) darkPixels++; - else if (avg > 200) brightPixels++; - - const colorVariance = Math.abs(r - g) + Math.abs(g - b) + Math.abs(b - r); - if (colorVariance > 30) colorfulPixels++; - } - - const totalPixels = (data.length / channels); - console.log(`暗像素: ${(darkPixels / totalPixels * 100).toFixed(1)}%`); - console.log(`亮像素: ${(brightPixels / totalPixels * 100).toFixed(1)}%`); - console.log(`彩色像素: ${(colorfulPixels / totalPixels * 100).toFixed(1)}%`); - - // 生成调试图像 - 边缘检测结果 - const debugDir = path.join(path.dirname(imagePath), '..', 'debug'); - if (!fs.existsSync(debugDir)) { - fs.mkdirSync(debugDir, { recursive: true }); - } - - const basename = path.basename(imagePath, '.png'); - - // 保存灰度图 - await sharp(imagePath) - .greyscale() - .toFile(path.join(debugDir, `${basename}_gray.png`)); - - // 保存边缘检测结果 - await sharp(imagePath) - .greyscale() - .normalize() - .convolve({ - width: 3, - height: 3, - kernel: [-1, -1, -1, -1, 8, -1, -1, -1, -1] - }) - .toFile(path.join(debugDir, `${basename}_edge.png`)); - - console.log(`调试图像已保存到: ${debugDir}`); -} - -async function main() { - const baseDir = path.join(__dirname, '..'); - const doubanDir = path.join(baseDir, 'images', 'douban'); - - // 分析第一张图片 - const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')).slice(0, 3); - - for (const file of files) { - await analyzeImage(path.join(doubanDir, file)); - } -} - -main().catch(console.error); diff --git a/src/detection/candidate-search.ts b/src/detection/candidate-search.ts new file mode 100644 index 0000000..5e577af --- /dev/null +++ b/src/detection/candidate-search.ts @@ -0,0 +1,705 @@ +import sharp from 'sharp'; +import { BoundingBox, RawImage } from '../types'; +import { calculateIoU } from '../utils/geometry'; +import { createEdgeMap, morphologyClose, dilate, toGrayscale } from '../utils/image'; + +interface CandidateSearchInput { + original: RawImage; + normalized: RawImage; + quantizationSource: sharp.Sharp; +} + +export async function findCandidateBoxes({ + original, + normalized, + quantizationSource, +}: CandidateSearchInput): Promise { + const { width, height, channels } = normalized; + + const mixedBoxes = detectDarkRegions(normalized.data, width, height, channels); + const edgeBoxes = detectByEdges(normalized.data, width, height, channels); + const quantizedBoxes = await detectByColorQuantization( + quantizationSource, + width, + height, + channels + ); + const labBoxes = detectByLabColor(original.data, width, height, channels); + + const allBoxes = [...mixedBoxes, ...edgeBoxes, ...quantizedBoxes, ...labBoxes]; + const uniqueBoxes: BoundingBox[] = []; + allBoxes + .sort( + (a, b) => + b.score / (b.width * b.height) - a.score / (a.width * a.height) + ) + .forEach((box) => { + if (!uniqueBoxes.some((ub) => calculateIoU(ub, box) > 0.5)) { + uniqueBoxes.push(box); + } + }); + + const edgeMap = createEdgeMap(original); + + const scoredBoxes = uniqueBoxes + .map((box) => + scoreCandidate(box, original, normalized, edgeMap) + ) + .filter((box) => { + const aspectRatio = box.width / box.height; + const marginX = width * 0.05; + const marginY = height * 0.05; + + const isNotOnEdge = + box.x > marginX && + box.y > marginY && + box.x + box.width < width - marginX && + box.y + box.height < height - marginY; + + return ( + box.width >= 60 && + box.width <= 120 && + box.height >= 60 && + box.height <= 120 && + aspectRatio >= 0.7 && + aspectRatio <= 1.3 && + isNotOnEdge + ); + }) + .sort((a, b) => b.score - a.score); + + return scoredBoxes; +} + +function scoreCandidate( + box: BoundingBox, + original: RawImage, + normalized: RawImage, + edgeMap: Uint8Array +): BoundingBox { + const aspectRatio = box.width / box.height; + const isSquare = aspectRatio >= 0.85 && aspectRatio <= 1.18; + const isConsistent = verifyHueConsistency(original, box); + const internalEdgeDensity = calculateInternalEdgeDensity( + edgeMap, + normalized.width, + box + ); + const gradientScore = calculateEdgeGradientScore(original, box); + + let score = box.score / (box.width * box.height); + if (isSquare) score += 0.5; + if (isConsistent) score += 0.8; + if (internalEdgeDensity < 0.15) score += 0.8; + if (internalEdgeDensity < 0.1) score += 0.6; + score += gradientScore * 2.0; + + return { ...box, score }; +} + +function verifyHueConsistency(image: RawImage, box: BoundingBox): boolean { + const hueValues: number[] = []; + const saturationValues: number[] = []; + + const inset = 5; + const startY = box.y + inset; + const endY = box.y + box.height - inset; + const startX = box.x + inset; + const endX = box.x + box.width - inset; + + if (endY <= startY || endX <= startX) return true; + + const { data, width, channels } = image; + + for (let y = startY; y < endY; y++) { + for (let x = startX; x < endX; x++) { + const idx = (y * width + x) * channels; + const r = data[idx] / 255; + const g = data[idx + 1] / 255; + const b = data[idx + 2] / 255; + + const max = Math.max(r, g, b); + const min = Math.min(r, g, b); + let h = 0; + let s = 0; + const l = (max + min) / 2; + + if (max !== min) { + const d = max - min; + s = l > 0.5 ? d / (2 - max - min) : d / (max + min); + switch (max) { + case r: + h = (g - b) / d + (g < b ? 6 : 0); + break; + case g: + h = (b - r) / d + 2; + break; + case b: + h = (r - g) / d + 4; + break; + } + h /= 6; + } + + if (s > 0.15 && l > 0.1 && l < 0.9) { + hueValues.push(h * 360); + saturationValues.push(s); + } + } + } + + const coloredPixels = hueValues.length; + const internalArea = (box.width - 2 * inset) * (box.height - 2 * inset); + + if (coloredPixels < internalArea * 0.2) { + return true; + } + + const normalizeHue = (h: number) => (h > 180 ? h - 360 : h); + const normalizedHues = hueValues.map(normalizeHue); + const meanHue = + normalizedHues.reduce((a, b) => a + b, 0) / normalizedHues.length; + const stdDevHue = Math.sqrt( + normalizedHues + .map((h) => Math.pow(h - meanHue, 2)) + .reduce((a, b) => a + b, 0) / normalizedHues.length + ); + + return stdDevHue < 25; +} + +function calculateInternalEdgeDensity( + edgeMap: Uint8Array, + width: number, + box: BoundingBox +): number { + let edgePixels = 0; + const shrink = 5; + + const startX = box.x + shrink; + const startY = box.y + shrink; + const endX = box.x + box.width - shrink; + const endY = box.y + box.height - shrink; + + if (endX <= startX || endY <= startY) return 0; + + for (let y = startY; y < endY; y++) { + for (let x = startX; x < endX; x++) { + if (edgeMap[y * width + x] === 1) { + edgePixels++; + } + } + } + + const area = (endX - startX) * (endY - startY); + return area === 0 ? 0 : edgePixels / area; +} + +function calculateEdgeGradientScore(image: RawImage, box: BoundingBox): number { + const gradients: number[] = []; + const band = 5; + const { data, width, height, channels } = image; + + const sampleLine = ( + x1: number, + y1: number, + x2: number, + y2: number + ) => { + const dx = x2 - x1; + const dy = y2 - y1; + const steps = Math.max(Math.abs(dx), Math.abs(dy)); + if (steps === 0) return; + + let lastBrightness = -1; + + for (let i = 0; i <= steps; i++) { + const x = Math.round(x1 + (dx * i) / steps); + const y = Math.round(y1 + (dy * i) / steps); + + if (x < 0 || x >= width || y < 0 || y >= height) continue; + + const idx = (y * width + x) * channels; + const brightness = + data[idx] * 0.299 + data[idx + 1] * 0.587 + data[idx + 2] * 0.114; + + if (lastBrightness !== -1) { + gradients.push(Math.abs(brightness - lastBrightness)); + } + lastBrightness = brightness; + } + }; + + sampleLine(box.x, box.y - band, box.x + box.width, box.y - band); + sampleLine( + box.x, + box.y + box.height + band, + box.x + box.width, + box.y + box.height + band + ); + sampleLine(box.x - band, box.y, box.x - band, box.y + box.height); + sampleLine( + box.x + box.width + band, + box.y, + box.x + box.width + band, + box.y + box.height + ); + + if (gradients.length < 20) { + return 0.5; + } + + const mean = gradients.reduce((a, b) => a + b, 0) / gradients.length; + const variance = + gradients.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / + gradients.length; + + return Math.exp(-variance / 100); +} + +function detectDarkRegions( + data: Buffer, + width: number, + height: number, + channels: number +): BoundingBox[] { + const allCandidates: BoundingBox[] = []; + + for (const brightThreshold of [130, 160, 190, 220]) { + const whiteMap = new Uint8Array(width * height); + for (let i = 0; i < data.length; i += channels) { + const brightness = + data[i] * 0.299 + data[i + 1] * 0.587 + data[i + 2] * 0.114; + whiteMap[i / channels] = brightness > brightThreshold ? 1 : 0; + } + const dilatedMap = dilate(whiteMap, width, height, 5); + const regions = findDarkRegionsList(dilatedMap, width, height); + allCandidates.push( + ...selectBestRegions(regions, width, height, true) + ); + } + + for (const darkThreshold of [40, 60, 80, 100, 120]) { + const darkMap = new Uint8Array(width * height); + for (let i = 0; i < data.length; i += channels) { + const brightness = + data[i] * 0.299 + data[i + 1] * 0.587 + data[i + 2] * 0.114; + darkMap[i / channels] = brightness < darkThreshold ? 1 : 0; + } + const cleaned = morphologyClose(darkMap, width, height, 3); + const regions = findDarkRegionsList(cleaned, width, height); + allCandidates.push( + ...selectBestRegions(regions, width, height, true) + ); + } + + if (allCandidates.length === 0) return []; + + const uniqueCandidates: BoundingBox[] = []; + allCandidates.sort((a, b) => b.score - a.score).forEach((candidate) => { + if (!uniqueCandidates.some((s) => calculateIoU(s, candidate) > 0.4)) { + uniqueCandidates.push(candidate); + } + }); + + return uniqueCandidates; +} + +function findDarkRegionsList( + binary: Uint8Array, + width: number, + height: number +): BoundingBox[] { + const visited = new Uint8Array(width * height); + const regions: BoundingBox[] = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + if (visited[idx] === 0 && binary[idx] === 1) { + const region = floodFill(binary, visited, x, y, width, height); + if (region.width >= 20 && region.height >= 20) { + regions.push(region); + } + } + } + } + + return regions; +} + +function selectBestRegions( + regions: BoundingBox[], + imageWidth: number, + imageHeight: number, + selectMultiple: boolean = false +): BoundingBox[] { + if (regions.length === 0) return []; + + const validRegions = regions.filter( + (region) => + region.width < imageWidth * 0.5 && region.height < imageHeight * 0.5 + ); + + const candidates = validRegions.filter((region) => { + const aspectRatio = region.width / region.height; + const centerY = region.y + region.height / 2; + const sizeDiff = Math.abs(region.width - region.height); + + return ( + region.width >= 70 && + region.width <= 110 && + region.height >= 70 && + region.height <= 110 && + aspectRatio >= 0.85 && + aspectRatio <= 1.18 && + sizeDiff <= 20 && + centerY > imageHeight * 0.1 && + centerY < imageHeight * 0.8 + ); + }); + + if (candidates.length === 0) return []; + + candidates.sort((a, b) => { + const densityA = a.score / (a.width * a.height); + const densityB = b.score / (b.width * b.height); + const aspectScoreA = Math.abs(a.width / a.height - 1); + const aspectScoreB = Math.abs(b.width / b.height - 1); + return densityB * 3 - aspectScoreB - (densityA * 3 - aspectScoreA); + }); + + const selected: BoundingBox[] = []; + for (const candidate of candidates) { + const overlaps = selected.some( + (s) => calculateIoU(s, candidate) > 0.3 + ); + if (!overlaps) { + selected.push(candidate); + if (!selectMultiple && selected.length >= 1) break; + if (selectMultiple && selected.length >= 3) break; + } + } + + return selected; +} + +function detectByEdges( + data: Buffer, + width: number, + height: number, + channels: number +): BoundingBox[] { + const gray = toGrayscale(data, width, height, channels); + const edges = new Uint8Array(width * height); + + for (let y = 1; y < height - 1; y++) { + for (let x = 1; x < width - 1; x++) { + const idx = y * width + x; + const gx = + -gray[(y - 1) * width + (x - 1)] + + gray[(y - 1) * width + (x + 1)] - + 2 * gray[idx - 1] + + 2 * gray[idx + 1] - + gray[(y + 1) * width + (x - 1)] + + gray[(y + 1) * width + (x + 1)]; + + const gy = + -gray[(y - 1) * width + (x - 1)] - + 2 * gray[(y - 1) * width + x] - + gray[(y - 1) * width + (x + 1)] + + gray[(y + 1) * width + (x - 1)] + + 2 * gray[(y + 1) * width + x] + + gray[(y + 1) * width + (x + 1)]; + + const magnitude = Math.sqrt(gx * gx + gy * gy); + edges[idx] = magnitude > 40 ? 1 : 0; + } + } + + const dilatedMap = dilate(edges, width, height, 4); + const regions = findDarkRegionsList(dilatedMap, width, height); + return selectBestRegions(regions, width, height, true); +} + +async function detectByColorQuantization( + image: sharp.Sharp, + width: number, + height: number, + channels: number +): Promise { + try { + const smoothed = await image + .clone() + .median(3) + .ensureAlpha() + .raw() + .toBuffer({ resolveWithObject: true }); + + const { data: smoothData, info } = smoothed; + const channelCount = info.channels ?? channels; + const quantized = Buffer.from(smoothData); + + const palette = [ + [240, 240, 240], + [200, 200, 200], + [150, 150, 150], + [100, 100, 100], + [60, 60, 60], + [30, 30, 30], + [0, 0, 0], + ]; + + for (let i = 0; i < quantized.length; i += channelCount) { + const r = quantized[i]; + const g = quantized[i + 1]; + const b = quantized[i + 2]; + let minDist = Infinity; + let closest = 0; + + for (let p = 0; p < palette.length; p++) { + const [pr, pg, pb] = palette[p]; + const dist = Math.pow(r - pr, 2) + Math.pow(g - pg, 2) + Math.pow(b - pb, 2); + if (dist < minDist) { + minDist = dist; + closest = p; + } + } + + const [qr, qg, qb] = palette[closest]; + quantized[i] = qr; + quantized[i + 1] = qg; + quantized[i + 2] = qb; + } + + const visited = new Uint8Array(width * height); + const regions: BoundingBox[] = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + if (visited[idx] === 0) { + const region = floodFillOnQuantized( + quantized, + visited, + x, + y, + width, + height, + channelCount + ); + + if ( + region.width >= 40 && + region.width <= 140 && + region.height >= 40 && + region.height <= 140 + ) { + const aspectRatio = region.width / region.height; + if (aspectRatio >= 0.7 && aspectRatio <= 1.4) { + regions.push(region); + } + } + } + } + } + + return selectBestRegions(regions, width, height, true); + } catch (error) { + console.error('[Quantization] Failed to quantize image:', error); + return []; + } +} + +function detectByLabColor( + data: Buffer, + width: number, + height: number, + channels: number +): BoundingBox[] { + const labMap = new Float32Array(width * height * 3); + for (let i = 0; i < width * height; i++) { + const idx = i * channels; + const [l, a, b] = rgbToLab(data[idx], data[idx + 1], data[idx + 2]); + labMap[i * 3] = l; + labMap[i * 3 + 1] = a; + labMap[i * 3 + 2] = b; + } + + const diffMap = new Uint8Array(width * height); + const neighborhood = 8; + for (let y = neighborhood; y < height - neighborhood; y++) { + for (let x = neighborhood; x < width - neighborhood; x++) { + const centerIdx = y * width + x; + let maxDiff = 0; + for (let ny = -neighborhood; ny <= neighborhood; ny += neighborhood) { + for (let nx = -neighborhood; nx <= neighborhood; nx += neighborhood) { + if (nx === 0 && ny === 0) continue; + const neighborIdx = (y + ny) * width + (x + nx); + const deltaE = Math.sqrt( + Math.pow(labMap[centerIdx * 3] - labMap[neighborIdx * 3], 2) + + Math.pow(labMap[centerIdx * 3 + 1] - labMap[neighborIdx * 3 + 1], 2) + + Math.pow(labMap[centerIdx * 3 + 2] - labMap[neighborIdx * 3 + 2], 2) + ); + if (deltaE > maxDiff) { + maxDiff = deltaE; + } + } + } + if (maxDiff > 12) { + diffMap[centerIdx] = 1; + } + } + } + + const cleaned = morphologyClose(diffMap, width, height, 5); + const regions = findDarkRegionsList(cleaned, width, height); + return selectBestRegions(regions, width, height, true); +} + +function rgbToLab(r: number, g: number, b: number): [number, number, number] { + let R = r / 255; + let G = g / 255; + let B = b / 255; + R = R > 0.04045 ? Math.pow((R + 0.055) / 1.055, 2.4) : R / 12.92; + G = G > 0.04045 ? Math.pow((G + 0.055) / 1.055, 2.4) : G / 12.92; + B = B > 0.04045 ? Math.pow((B + 0.055) / 1.055, 2.4) : B / 12.92; + + const X = R * 0.4124 + G * 0.3576 + B * 0.1805; + const Y = R * 0.2126 + G * 0.7152 + B * 0.0722; + const Z = R * 0.0193 + G * 0.1192 + B * 0.9505; + + let x = X / 0.95047; + let y = Y / 1.0; + let z = Z / 1.08883; + + x = x > 0.008856 ? Math.pow(x, 1 / 3) : 7.787 * x + 16 / 116; + y = y > 0.008856 ? Math.pow(y, 1 / 3) : 7.787 * y + 16 / 116; + z = z > 0.008856 ? Math.pow(z, 1 / 3) : 7.787 * z + 16 / 116; + + const L = 116 * y - 16; + const a = 500 * (x - y); + const bLab = 200 * (y - z); + + return [L, a, bLab]; +} + +function floodFillOnQuantized( + data: Buffer, + visited: Uint8Array, + startX: number, + startY: number, + width: number, + height: number, + channels: number +): BoundingBox { + const startIdx = (startY * width + startX) * channels; + const targetColor = [ + data[startIdx], + data[startIdx + 1], + data[startIdx + 2], + ]; + + let minX = startX; + let minY = startY; + let maxX = startX; + let maxY = startY; + let pixelCount = 0; + const stack: Array<[number, number]> = [[startX, startY]]; + + visited[startY * width + startX] = 1; + + while (stack.length > 0) { + const [x, y] = stack.pop()!; + pixelCount++; + + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + + const neighbors: Array<[number, number]> = [ + [x + 1, y], + [x - 1, y], + [x, y + 1], + [x, y - 1], + ]; + + for (const [nx, ny] of neighbors) { + if (nx >= 0 && nx < width && ny >= 0 && ny < height) { + const nIdx = ny * width + nx; + if (visited[nIdx] === 0) { + const baseIdx = nIdx * channels; + const neighborColor = [ + data[baseIdx], + data[baseIdx + 1], + data[baseIdx + 2], + ]; + if ( + neighborColor[0] === targetColor[0] && + neighborColor[1] === targetColor[1] && + neighborColor[2] === targetColor[2] + ) { + visited[nIdx] = 1; + stack.push([nx, ny]); + } + } + } + } + } + + return { + x: minX, + y: minY, + width: maxX - minX + 1, + height: maxY - minY + 1, + score: pixelCount, + }; +} + +function floodFill( + binary: Uint8Array, + visited: Uint8Array, + startX: number, + startY: number, + width: number, + height: number +): BoundingBox { + let minX = startX; + let minY = startY; + let maxX = startX; + let maxY = startY; + let pixelCount = 0; + + const stack: Array<[number, number]> = [[startX, startY]]; + + while (stack.length > 0) { + const [x, y] = stack.pop()!; + + if (x < 0 || x >= width || y < 0 || y >= height) continue; + + const idx = y * width + x; + if (visited[idx] === 1 || binary[idx] === 0) continue; + + visited[idx] = 1; + pixelCount++; + + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + + stack.push([x + 1, y]); + stack.push([x - 1, y]); + stack.push([x, y + 1]); + stack.push([x, y - 1]); + } + + return { + x: minX, + y: minY, + width: maxX - minX + 1, + height: maxY - minY + 1, + score: pixelCount, + }; +} diff --git a/src/detector-cv.ts b/src/detector-cv.ts deleted file mode 100644 index e69de29..0000000 diff --git a/src/detector-edge.ts b/src/detector-edge.ts deleted file mode 100644 index 75dafe2..0000000 --- a/src/detector-edge.ts +++ /dev/null @@ -1,413 +0,0 @@ -import sharp from 'sharp'; - -export interface BoundingBox { - x: number; - y: number; - width: number; - height: number; - score: number; -} - -interface DetectOptions { - downscaleWidth?: number; - expectedWidth?: number; - expectedHeight?: number; - widthTolerance?: number; - heightTolerance?: number; - maxCandidates?: number; -} - -const DEFAULT_EXPECTED_WIDTH = 470; -const DEFAULT_EXPECTED_HEIGHT = 110; -const DEFAULT_TOLERANCE = 0.35; -const DEFAULT_MAX_CANDIDATES = 6; -const CLAMP_EPSILON = 1e-6; - -/** - * 基于梯度能量的滑块检测器,针对豆瓣滑块的长条形状做了定制优化。 - * 算法要点: - * 1. 对整图按固定宽度缩放,保证不同图像的尺度一致。 - * 2. 使用垂直梯度能量(行差分)定位滑块上、下边缘。 - * 3. 在候选上下边界之间利用水平梯度能量(列差分)寻找左右边缘。 - * 4. 结合期望宽高与对比度评分筛选最优候选。 - */ -export class EdgeSliderDetector { - async detectSlider( - imagePath: string, - outputPath?: string, - detectMultiple: boolean = false, - options: DetectOptions = {} - ): Promise { - const { - downscaleWidth = 512, - expectedWidth = DEFAULT_EXPECTED_WIDTH, - expectedHeight = DEFAULT_EXPECTED_HEIGHT, - widthTolerance = DEFAULT_TOLERANCE, - heightTolerance = DEFAULT_TOLERANCE, - maxCandidates = DEFAULT_MAX_CANDIDATES, - } = options; - - const metadata = await sharp(imagePath).metadata(); - if (!metadata.width || !metadata.height) { - throw new Error(`无法读取图片尺寸: ${imagePath}`); - } - - const scale = - metadata.width > downscaleWidth ? downscaleWidth / metadata.width : 1; - const resized = await sharp(imagePath) - .resize({ width: Math.max(1, Math.round(metadata.width * scale)) }) - .greyscale() - .raw() - .toBuffer({ resolveWithObject: true }); - - const { data, info } = resized; - const scaledWidth = info.width; - const scaledHeight = info.height; - - const rowEnergy = this.computeRowGradient(data, scaledWidth, scaledHeight); - const smoothRow = this.smooth(rowEnergy, 9); - - const verticalBand = this.locateVerticalBand( - data, - smoothRow, - scaledWidth, - scaledHeight, - expectedHeight * scale, - heightTolerance, - maxCandidates - ); - - if (!verticalBand) { - return detectMultiple ? [] : null; - } - - const colEnergy = this.computeColumnGradient( - data, - scaledWidth, - scaledHeight, - verticalBand.top, - verticalBand.bottom - ); - const smoothCol = this.smooth(colEnergy, 9); - - const horizontalSpan = this.locateHorizontalSpan( - data, - smoothCol, - scaledWidth, - verticalBand, - expectedWidth * scale, - widthTolerance, - maxCandidates - ); - - if (!horizontalSpan) { - return detectMultiple ? [] : null; - } - - const scaledBox: BoundingBox = { - x: horizontalSpan.left, - y: verticalBand.top, - width: horizontalSpan.right - horizontalSpan.left + 1, - height: verticalBand.bottom - verticalBand.top + 1, - score: verticalBand.score + horizontalSpan.score, - }; - - const box = this.toOriginalBox(scaledBox, scale, metadata.width, metadata.height); - - if (outputPath) { - await this.drawBoxes(imagePath, [box], outputPath); - } - - if (detectMultiple) { - return [box]; - } - return box; - } - - private computeRowGradient(data: Buffer, width: number, height: number): Float32Array { - const grad = new Float32Array(height); - for (let y = 0; y < height - 1; y += 1) { - let sum = 0; - const row = y * width; - const nextRow = (y + 1) * width; - for (let x = 0; x < width; x += 1) { - sum += Math.abs(data[nextRow + x] - data[row + x]); - } - grad[y] = sum / (width + CLAMP_EPSILON); - } - return grad; - } - - private computeColumnGradient( - data: Buffer, - width: number, - height: number, - top: number, - bottom: number - ): Float32Array { - const grad = new Float32Array(width); - const bandHeight = Math.max(1, bottom - top + 1); - for (let x = 0; x < width - 1; x += 1) { - let sum = 0; - for (let y = top; y <= bottom; y += 1) { - const idx = y * width + x; - sum += Math.abs(data[idx + 1] - data[idx]); - } - grad[x] = sum / (bandHeight + CLAMP_EPSILON); - } - return grad; - } - - private smooth(values: Float32Array, window: number): Float32Array { - if (window <= 1) return Float32Array.from(values); - const result = new Float32Array(values.length); - const radius = Math.max(1, Math.floor(window / 2)); - for (let i = 0; i < values.length; i += 1) { - let sum = 0; - let count = 0; - for (let offset = -radius; offset <= radius; offset += 1) { - const idx = i + offset; - if (idx >= 0 && idx < values.length) { - sum += values[idx]; - count += 1; - } - } - result[i] = count > 0 ? sum / count : values[i]; - } - return result; - } - - private locateVerticalBand( - data: Buffer, - rowEnergy: Float32Array, - width: number, - height: number, - expectedHeight: number, - tolerance: number, - maxCandidates: number - ): { top: number; bottom: number; score: number } | null { - const searchStart = Math.floor(height * 0.15); - const searchEnd = Math.max(searchStart + 10, Math.floor(height * 0.95)); - const minHeight = Math.max(20, Math.floor(expectedHeight * (1 - tolerance))); - const maxHeight = Math.max(minHeight + 10, Math.floor(expectedHeight * (1 + tolerance))); - - const topCandidates = this.topIndices(rowEnergy, searchStart, searchEnd, maxCandidates); - if (topCandidates.length === 0) { - return null; - } - - let best: { top: number; bottom: number; score: number } | null = null; - - for (const top of topCandidates) { - const bottomStart = Math.min(height - 2, top + minHeight); - const bottomEnd = Math.min(height - 2, top + maxHeight); - if (bottomEnd <= bottomStart) continue; - - const bottom = this.maxIndex(rowEnergy, bottomStart, bottomEnd); - const bandScore = this.bandContrast(data, width, height, top, bottom); - - const actualHeight = bottom - top; - const expectedPenalty = Math.abs(actualHeight - expectedHeight); - const heightScore = Math.max(0, 1 - expectedPenalty / (expectedHeight * tolerance + 1)); - const score = (rowEnergy[top] + rowEnergy[bottom]) * (0.6 + 0.4 * heightScore) + bandScore * 0.6; - - if (!best || score > best.score) { - best = { top: top, bottom: bottom, score }; - } - } - - return best; - } - - private locateHorizontalSpan( - data: Buffer, - colEnergy: Float32Array, - width: number, - band: { top: number; bottom: number }, - expectedWidth: number, - tolerance: number, - maxCandidates: number - ): { left: number; right: number; score: number } | null { - const minWidth = Math.max(30, Math.floor(expectedWidth * (1 - tolerance))); - const maxWidth = Math.max(minWidth + 20, Math.floor(expectedWidth * (1 + tolerance))); - - const leftCandidates = this.topIndices(colEnergy, 2, width - 3, maxCandidates); - if (leftCandidates.length === 0) { - return null; - } - - let best: { left: number; right: number; score: number } | null = null; - - for (const left of leftCandidates) { - const rightStart = Math.min(width - 3, left + minWidth); - const rightEnd = Math.min(width - 3, left + maxWidth); - if (rightEnd <= rightStart) continue; - - const right = this.maxIndex(colEnergy, rightStart, rightEnd); - const actualWidth = right - left; - if (actualWidth < minWidth || actualWidth > maxWidth) { - continue; - } - - const contrastScore = this.bandContrastColumns(data, width, band.top, band.bottom, left, right); - const widthPenalty = Math.abs(actualWidth - expectedWidth); - const widthScore = Math.max(0, 1 - widthPenalty / (expectedWidth * tolerance + 1)); - const score = - (colEnergy[left] + colEnergy[right]) * (0.6 + 0.4 * widthScore) + contrastScore * 0.4; - - if (!best || score > best.score) { - best = { left, right, score }; - } - } - - return best; - } - - private topIndices( - values: Float32Array, - start: number, - end: number, - maxCount: number - ): number[] { - const pairs: Array<{ index: number; value: number }> = []; - for (let i = start; i < end && i < values.length; i += 1) { - pairs.push({ index: i, value: values[i] }); - } - pairs.sort((a, b) => b.value - a.value); - return pairs.slice(0, maxCount).map(item => item.index); - } - - private maxIndex(values: Float32Array, start: number, end: number): number { - let bestIdx = start; - let bestVal = values[start]; - for (let i = start + 1; i <= end && i < values.length; i += 1) { - if (values[i] > bestVal) { - bestVal = values[i]; - bestIdx = i; - } - } - return bestIdx; - } - - private bandContrast(data: Buffer, width: number, height: number, top: number, bottom: number): number { - const innerMean = this.meanRows(data, width, height, top, bottom); - const topMean = this.meanRows(data, width, height, Math.max(0, top - 12), Math.max(0, top - 1)); - const bottomMean = this.meanRows( - data, - width, - height, - Math.min(height - 1, bottom + 1), - Math.min(height - 1, bottom + 12) - ); - const outsideMean = (topMean + bottomMean) / 2; - return Math.abs(innerMean - outsideMean); - } - - private bandContrastColumns( - data: Buffer, - width: number, - top: number, - bottom: number, - left: number, - right: number - ): number { - const height = Math.floor(data.length / width); - const innerMean = this.meanColumns(data, width, height, top, bottom, left, right); - const leftMean = this.meanColumns( - data, - width, - height, - top, - bottom, - Math.max(0, left - 20), - Math.max(left - 2, left - 1) - ); - const rightMean = this.meanColumns( - data, - width, - height, - top, - bottom, - Math.min(width - 1, right + 1), - Math.min(width - 1, right + 20) - ); - const outsideMean = (leftMean + rightMean) / 2; - return Math.abs(innerMean - outsideMean); - } - - private meanRows( - data: Buffer, - width: number, - height: number, - startRow: number, - endRow: number - ): number { - const s = Math.max(0, Math.min(startRow, height - 1)); - const e = Math.max(s, Math.min(endRow, height - 1)); - let sum = 0; - let count = 0; - for (let y = s; y <= e; y += 1) { - const rowOffset = y * width; - for (let x = 0; x < width; x += 1) { - sum += data[rowOffset + x]; - } - count += width; - } - return count > 0 ? sum / count : 0; - } - - private meanColumns( - data: Buffer, - width: number, - height: number, - top: number, - bottom: number, - startCol: number, - endCol: number - ): number { - const topClamped = Math.max(0, Math.min(top, height - 1)); - const bottomClamped = Math.max(topClamped, Math.min(bottom, height - 1)); - const s = Math.max(0, startCol); - const e = Math.max(s, Math.min(endCol, width - 1)); - let sum = 0; - let count = 0; - for (let x = s; x <= e; x += 1) { - for (let y = topClamped; y <= bottomClamped; y += 1) { - sum += data[y * width + x]; - } - count += bottomClamped - topClamped + 1; - } - return count > 0 ? sum / count : 0; - } - - private toOriginalBox(box: BoundingBox, scale: number, width: number, height: number): BoundingBox { - const inv = scale === 0 ? 1 : 1 / scale; - const x = Math.round(box.x * inv); - const y = Math.round(box.y * inv); - const w = Math.round(box.width * inv); - const h = Math.round(box.height * inv); - return { - x: Math.max(0, Math.min(x, width - 1)), - y: Math.max(0, Math.min(y, height - 1)), - width: Math.max(1, Math.min(w, width - x)), - height: Math.max(1, Math.min(h, height - y)), - score: box.score, - }; - } - - private async drawBoxes(imagePath: string, boxes: BoundingBox[], outputPath: string) { - const image = sharp(imagePath); - const metadata = await image.metadata(); - const svgBoxes = boxes - .map(box => { - return ``; - }) - .join('\n'); - - const svg = `${svgBoxes}`; - - await image - .composite([{ input: Buffer.from(svg), top: 0, left: 0 }]) - .toFile(outputPath); - } -} diff --git a/src/detector-self-learning.ts b/src/detector-self-learning.ts index a604c95..24a7104 100644 --- a/src/detector-self-learning.ts +++ b/src/detector-self-learning.ts @@ -1,7 +1,7 @@ import sharp from 'sharp'; import * as fs from 'fs'; import * as path from 'path'; -import { BoundingBox } from './detector'; +import { BoundingBox } from './types'; async function matchTemplate( image: sharp.Sharp, @@ -120,15 +120,21 @@ export class SelfLearningSliderDetector { const searchArea = { x: 0, - y: Math.max(0, seedBox.y - 20), + y: Math.max(0, seedBox.y - 25), // 显著放宽垂直搜索范围 width: imageWidth, - height: seedBox.height + 40, + height: seedBox.height + 50, // 显著放宽垂直搜索范围 }; const { maxVal, maxLoc } = await matchTemplate(imageEdge, templateEdge, searchArea, seedBox); - console.log(` [SelfLearning] Max score for ${path.basename(imagePath)}: ${maxVal.toFixed(4)}`); + console.log(` [SelfLearning] Max score for ${path.basename(imagePath)}: ${maxVal.toFixed(4)} at y=${maxLoc.y}`); - if (maxVal > 0.3) { // Increased threshold for higher confidence + // 验证第二个滑块是否在同一水平线上,放宽y轴偏差到25px + if (Math.abs(maxLoc.y - seedBox.y) > 25) { + console.log(` [SelfLearning] Discarded second slider candidate because it's not on the same horizontal line (y-delta: ${Math.abs(maxLoc.y - seedBox.y)}px).`); + return null; + } + + if (maxVal > 0.35) { // 使用一个相对宽松但合理的阈值 return { x: maxLoc.x, y: maxLoc.y, diff --git a/src/detector-template.ts b/src/detector-template.ts deleted file mode 100644 index bf10041..0000000 --- a/src/detector-template.ts +++ /dev/null @@ -1,134 +0,0 @@ -import sharp from 'sharp'; -import { BoundingBox } from './detector'; - -// TM_CCOEFF_NORMED: 归一化相关系数匹配。对于光照变化不敏感,效果较好。 -async function matchTemplate( - image: sharp.Sharp, - template: sharp.Sharp -): Promise<{ maxVal: number; maxLoc: { x: number; y: number } }> { - const { data: imageBuffer, info: imageInfo } = await image - .raw() - .toBuffer({ resolveWithObject: true }); - const { data: templateBuffer, info: templateInfo } = await template - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width: imageWidth, height: imageHeight, channels: imageChannels } = imageInfo; - const { width: templateWidth, height: templateHeight, channels: templateChannels } = templateInfo; - - if (!imageWidth || !imageHeight || !templateWidth || !templateHeight) { - throw new Error('Image or template dimensions are invalid.'); - } - - let maxVal = -Infinity; - let maxLoc = { x: 0, y: 0 }; - - const resultWidth = imageWidth - templateWidth + 1; - const resultHeight = imageHeight - templateHeight + 1; - - for (let y = 0; y < resultHeight; y++) { - for (let x = 0; x < resultWidth; x++) { - let sumC = 0; - let sumT2 = 0; - let sumI2 = 0; - - for (let ty = 0; ty < templateHeight; ty++) { - for (let tx = 0; tx < templateWidth; tx++) { - const imageY = y + ty; - const imageX = x + tx; - - const imageIdx = (imageY * imageWidth + imageX) * imageChannels; - const templateIdx = (ty * templateWidth + tx) * templateChannels; - - // For Canny edge images, we only need one channel - const imageVal = imageBuffer[imageIdx]; - const templateVal = templateBuffer[templateIdx]; - - sumC += imageVal * templateVal; - sumT2 += templateVal * templateVal; - sumI2 += imageVal * imageVal; - } - } - - const denominator = Math.sqrt(sumT2 * sumI2); - const val = denominator === 0 ? 0 : sumC / denominator; - - if (val > maxVal) { - maxVal = val; - maxLoc = { x, y }; - } - } - } - - return { maxVal, maxLoc }; -} - -export class TemplateSliderDetector { - private async cannyEdge(image: sharp.Sharp): Promise { - // A simplified Canny implementation for template matching - return image - .grayscale() - .convolve({ // Gaussian blur - width: 5, - height: 5, - kernel: [ - 1, 4, 7, 4, 1, - 4, 16, 26, 16, 4, - 7, 26, 41, 26, 7, - 4, 16, 26, 16, 4, - 1, 4, 7, 4, 1, - ], - scale: 273, - }) - .raw() - .toBuffer({ resolveWithObject: true }) - .then(({ data, info }) => { - // Sobel edge detection (simplified) - const sobelData = Buffer.alloc(info.width * info.height); - for (let y = 1; y < info.height - 1; y++) { - for (let x = 1; x < info.width - 1; x++) { - const Gx = - -data[(y - 1) * info.width + x - 1] - 2 * data[y * info.width + x - 1] - data[(y + 1) * info.width + x - 1] + - data[(y - 1) * info.width + x + 1] + 2 * data[y * info.width + x + 1] + data[(y + 1) * info.width + x + 1]; - const Gy = - -data[(y - 1) * info.width + x - 1] - 2 * data[(y - 1) * info.width + x] - data[(y - 1) * info.width + x + 1] + - data[(y + 1) * info.width + x - 1] + 2 * data[(y + 1) * info.width + x] + data[(y + 1) * info.width + x + 1]; - - const magnitude = Math.sqrt(Gx * Gx + Gy * Gy); - sobelData[y * info.width + x] = magnitude > 50 ? 255 : 0; // Threshold - } - } - return sharp(sobelData, { raw: { width: info.width, height: info.height, channels: 1 } }); - }); - } - - public async detect(imagePath: string, templatePath: string): Promise { - try { - const image = sharp(imagePath); - const template = sharp(templatePath); - - const imageEdge = await this.cannyEdge(image); - const templateEdge = await this.cannyEdge(template); - - const { width: templateWidth, height: templateHeight } = await template.metadata(); - - const { maxVal, maxLoc } = await matchTemplate(imageEdge, templateEdge); - - console.log(` Template: ${templatePath}, Score: ${maxVal.toFixed(4)} at (${maxLoc.x}, ${maxLoc.y})`); - - if (maxVal > 0.3) { // Correlation threshold - return { - x: maxLoc.x, - y: maxLoc.y, - width: templateWidth || 0, - height: templateHeight || 0, - score: maxVal, - }; - } - return null; - } catch (error) { - console.error(`Error during template matching for ${imagePath}:`, error); - return null; - } - } -} diff --git a/src/detector.ts b/src/detector.ts index a28837d..131b866 100644 --- a/src/detector.ts +++ b/src/detector.ts @@ -1,1092 +1,280 @@ import sharp from 'sharp'; -import * as fs from 'fs'; -import * as path from 'path'; -import { SliderValidator, BoundingBox as ValidatorBox } from './validator'; +import { BoundingBox, RawImage } from './types'; +import { findCandidateBoxes } from './detection/candidate-search'; +import { calculateIoU } from './utils/geometry'; import { SelfLearningSliderDetector } from './detector-self-learning'; -export interface BoundingBox { - x: number; - y: number; - width: number; - height: number; - score: number; -} +type BoxColor = 'red' | 'blue' | 'green'; +const DEFAULT_DRAW_COLOR: BoxColor = 'blue'; export class SliderDetector { - /** - * 检测滑块位置 - 采用新的自学习流程 - * @param imagePath 图像路径 - * @param outputPath 输出路径 - * @param detectMultiple 是否检测多个滑块(在此实现中,总是尝试检测多个) - */ + private readonly selfLearning: SelfLearningSliderDetector; + + constructor(selfLearning?: SelfLearningSliderDetector) { + this.selfLearning = selfLearning ?? new SelfLearningSliderDetector(); + } + async detectSlider( imagePath: string, outputPath?: string, detectMultiple: boolean = true ): Promise { try { - const image = sharp(imagePath); - const initialCandidates = await this.findInitialCandidates(imagePath); + const baseImage = sharp(imagePath); - if (initialCandidates.length === 0) { - console.log(` [Detector] No initial candidates found for ${path.basename(imagePath)}.`); + const [originalRaw, normalizedRaw] = await Promise.all([ + baseImage + .clone() + .raw() + .toBuffer({ resolveWithObject: true }), + baseImage + .clone() + .normalize() + .raw() + .toBuffer({ resolveWithObject: true }), + ]); + + const original = toRawImage(originalRaw.data, originalRaw.info); + const normalized = toRawImage(normalizedRaw.data, normalizedRaw.info); + + const candidates = await findCandidateBoxes({ + original, + normalized, + quantizationSource: baseImage.clone(), + }); + + if (candidates.length === 0) { return null; } - // 选择分数最高的候选框作为种子 - const bestCandidate = initialCandidates.reduce((prev, current) => - (prev.score > current.score) ? prev : current - ); - - console.log(` [Detector] Best initial candidate for ${path.basename(imagePath)} at x: ${bestCandidate.x}, y: ${bestCandidate.y} (score: ${bestCandidate.score.toFixed(3)})`); + const seedBox = candidates[0]; - // 精炼最佳候选框 - const edgeImage = await this.cannyEdge(image); - const refinedSeedBox = await this.refineBox(bestCandidate, edgeImage); - console.log(` [Detector] Refined seed box to x: ${refinedSeedBox.x}, y: ${refinedSeedBox.y}, w: ${refinedSeedBox.width}, h: ${refinedSeedBox.height}`); + const edgeImage = await this.cannyEdge(baseImage.clone()); + const refinedSeed = await this.refineBox(seedBox, edgeImage); + const detections: BoundingBox[] = [refinedSeed]; - // 使用自学习检测器寻找第二个滑块 - const selfLearningDetector = new SelfLearningSliderDetector(); - const secondSlider = await selfLearningDetector.detectSecondSlider(imagePath, refinedSeedBox); - - const finalDetections = [refinedSeedBox]; - if (secondSlider) { - // 验证第二个滑块是否与第一个重叠过多 - const iou = this.calculateIoU(refinedSeedBox, secondSlider); - if (iou < 0.5) { - console.log(` [Detector] Found second slider at x: ${secondSlider.x}, y: ${secondSlider.y} with score ${secondSlider.score.toFixed(3)}`); - finalDetections.push(secondSlider); - } else { - console.log(` [Detector] Discarded second slider due to high overlap (IoU: ${iou.toFixed(3)}).`); + if (detectMultiple) { + const second = await this.selfLearning.detectSecondSlider( + imagePath, + refinedSeed + ); + if (second && calculateIoU(refinedSeed, second) < 0.5) { + detections.push(second); } - } else { - console.log(` [Detector] Self-learning detector did not find a second slider.`); } if (outputPath) { - await this.drawBoundingBoxes(imagePath, finalDetections, outputPath, 'blue'); + await this.drawBoundingBoxes(imagePath, detections, outputPath, DEFAULT_DRAW_COLOR); } - return finalDetections; + return detections; } catch (error) { - console.error(`Error in new detectSlider for ${imagePath}:`, error); + console.error(`Error detecting slider in ${imagePath}:`, error); return null; } } - /** - * Canny边缘检测 - */ + async annotate( + imagePath: string, + boxes: BoundingBox[], + outputPath: string, + color: BoxColor = DEFAULT_DRAW_COLOR + ): Promise { + await this.drawBoundingBoxes(imagePath, boxes, outputPath, color); + } + private async cannyEdge(image: sharp.Sharp): Promise { - return image + const { data, info } = await image + .clone() .grayscale() - .raw() - .toBuffer({ resolveWithObject: true }) - .then(({ data, info }) => { - const sobelData = Buffer.alloc(info.width * info.height); - for (let y = 1; y < info.height - 1; y++) { - for (let x = 1; x < info.width - 1; x++) { - const Gx = -data[(y - 1) * info.width + x - 1] - 2 * data[y * info.width + x - 1] - data[(y + 1) * info.width + x - 1] + data[(y - 1) * info.width + x + 1] + 2 * data[y * info.width + x + 1] + data[(y + 1) * info.width + x + 1]; - const Gy = -data[(y - 1) * info.width + x - 1] - 2 * data[(y - 1) * info.width + x] - data[(y - 1) * info.width + x + 1] + data[(y + 1) * info.width + x - 1] + 2 * data[(y + 1) * info.width + x] + data[(y + 1) * info.width + x + 1]; - const magnitude = Math.sqrt(Gx * Gx + Gy * Gy); - sobelData[y * info.width + x] = magnitude > 50 ? 255 : 0; - } - } - return sharp(sobelData, { raw: { width: info.width, height: info.height, channels: 1 } }); - }); - } - - /** - * 利用边缘投影精确裁剪边界框 - */ - private async refineBox(box: BoundingBox, edgeImage: sharp.Sharp): Promise { - try { - const { data, info } = await edgeImage - .clone() - .extract({ left: box.x, top: box.y, width: box.width, height: box.height }) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height } = info; - - const projX = new Array(width).fill(0); - const projY = new Array(height).fill(0); - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const pixel = data[y * width + x]; - if (pixel > 0) { - projX[x]++; - projY[y]++; - } - } - } - - const findBounds = (proj: number[], minThreshold: number = 2): { start: number; end: number } => { - let start = -1, end = -1; - - // Find first and last edge pixels - for (let i = 0; i < proj.length; i++) { - if (proj[i] >= minThreshold) { - if (start === -1) start = i; - end = i; - } - } - - if (start === -1) return { start: 0, end: proj.length - 1 }; // No edges found, return original - - // Refine start: move inwards from the first found edge - let bestStart = start; - for (let i = start; i < Math.min(proj.length, start + 10); i++) { - if (proj[i] >= minThreshold) { - bestStart = i; - break; - } - } - - // Refine end: move inwards from the last found edge - let bestEnd = end; - for (let i = end; i >= Math.max(0, end - 10); i--) { - if (proj[i] >= minThreshold) { - bestEnd = i; - break; - } - } - - return { start: bestStart, end: bestEnd }; - }; - - const { start: xStart, end: xEnd } = findBounds(projX); - const { start: yStart, end: yEnd } = findBounds(projY); - - const newX = box.x + xStart; - const newY = box.y + yStart; - const newWidth = xEnd - xStart + 1; - const newHeight = yEnd - yStart + 1; - - // Basic sanity check for refinement - if (newWidth <= 10 || newHeight <= 10 || newWidth > box.width * 1.2 || newHeight > box.height * 1.2) { - return box; // Return original if refinement is unreasonable - } - - return { - x: newX, - y: newY, - width: newWidth, - height: newHeight, - score: box.score, - }; - } catch (e) { - console.error(` [RefineBox] Error refining box, returning original.`, e); - return box; - } - } - - /** - * 初始候选查找:融合亮度、边缘检测 - */ - private async findInitialCandidates(imagePath: string): Promise { - const { data, info } = await sharp(imagePath) .raw() .toBuffer({ resolveWithObject: true }); - const { width, height, channels } = info; - - // 策略A:白色+暗色混合检测 - const mixedBoxes = this.detectDarkRegions(data, width, height, channels, true); - - // 策略B:简化的边缘检测 - const edgeBoxes = this.detectByEdges(data, width, height, channels); - - // 合并结果 - let allBoxes = [...mixedBoxes]; - for (const eb of edgeBoxes) { - if (!allBoxes.some(mb => this.calculateIoU(mb, eb) > 0.5)) { - allBoxes.push(eb); - } - } - - // 过滤和排序 - allBoxes = allBoxes.filter(candidate => { - const aspectRatio = candidate.width / candidate.height; - const sizeDiff = Math.abs(candidate.width - candidate.height); - return aspectRatio >= 0.85 && aspectRatio <= 1.18 && sizeDiff <= 20; - }); - - allBoxes.sort((a, b) => { - const scoreA = (1 - Math.abs(a.width / a.height - 1)) + a.score / (a.width * a.height); - const scoreB = (1 - Math.abs(b.width / b.height - 1)) + b.score / (b.width * b.height); - return scoreB - scoreA; - }); - - // 去重 - const uniqueBoxes: BoundingBox[] = []; - for (const box of allBoxes) { - if (!uniqueBoxes.some(ub => this.calculateIoU(ub, box) > 0.5)) { - uniqueBoxes.push(box); - } + const { width, height } = info; + if (!width || !height) { + throw new Error('Cannot compute edges without image dimensions'); } - return uniqueBoxes; - } - - /** - * 简化的边缘检测策略(基于Sobel算子) - */ - private detectByEdges(data: Buffer, width: number, height: number, channels: number): BoundingBox[] { - // 1. 转灰度 - const gray = new Uint8Array(width * height); - for (let i = 0; i < width * height; i++) { - const idx = i * channels; - gray[i] = Math.round(data[idx] * 0.299 + data[idx + 1] * 0.587 + data[idx + 2] * 0.114); - } - - // 2. Sobel边缘检测 - const edges = new Uint8Array(width * height); + const sobelData = Buffer.alloc(width * height); for (let y = 1; y < height - 1; y++) { for (let x = 1; x < width - 1; x++) { - const gx = -gray[(y-1)*width + x-1] + gray[(y-1)*width + x+1] - -2*gray[y*width + x-1] + 2*gray[y*width + x+1] - -gray[(y+1)*width + x-1] + gray[(y+1)*width + x+1]; - - const gy = -gray[(y-1)*width + x-1] - 2*gray[(y-1)*width + x] - gray[(y-1)*width + x+1] - +gray[(y+1)*width + x-1] + 2*gray[(y+1)*width + x] + gray[(y+1)*width + x+1]; - - const magnitude = Math.sqrt(gx*gx + gy*gy); - edges[y*width + x] = magnitude > 40 ? 1 : 0; + const idx = y * width + x; + const gx = + -data[(y - 1) * width + (x - 1)] - + 2 * data[y * width + (x - 1)] - + data[(y + 1) * width + (x - 1)] + + data[(y - 1) * width + (x + 1)] + + 2 * data[y * width + (x + 1)] + + data[(y + 1) * width + (x + 1)]; + const gy = + -data[(y - 1) * width + (x - 1)] - + 2 * data[(y - 1) * width + x] - + data[(y - 1) * width + (x + 1)] + + data[(y + 1) * width + (x - 1)] + + 2 * data[(y + 1) * width + x] + + data[(y + 1) * width + (x + 1)]; + const magnitude = Math.sqrt(gx * gx + gy * gy); + sobelData[idx] = magnitude > 50 ? 255 : 0; } } - // 3. 膨胀连接边缘 - const dilated = this.dilate(edges, width, height, 4); - - // 4. 查找连通区域 - const regions = this.findDarkRegionsList(dilated, width, height); - - // 5. 筛选候选 - return this.selectBestRegions(regions, width, height, true); + return sharp(sobelData, { + raw: { width, height, channels: 1 }, + }); } - /** - * 验证候选区域的色调一致性(真实滑块的特征) - */ - private verifyWhiteBorder( - data: Buffer, - width: number, - height: number, - channels: number, - box: BoundingBox - ): boolean { - // 采样区域内的像素,计算色调方差 - const samples: Array<{h: number, s: number, v: number}> = []; + private async refineBox( + box: BoundingBox, + edgeImage: sharp.Sharp + ): Promise { + try { + const { data, info } = await edgeImage + .clone() + .extract({ + left: box.x, + top: box.y, + width: box.width, + height: box.height, + }) + .raw() + .toBuffer({ resolveWithObject: true }); - // 每隔4个像素采样一次,避免计算量过大 - for (let y = box.y; y < box.y + box.height; y += 4) { - for (let x = box.x; x < box.x + box.width; x += 4) { - if (x >= width || y >= height) continue; - - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - - // 转换为HSV色彩空间 - const hsv = this.rgbToHsv(r, g, b); - samples.push(hsv); - } - } + const { width, height } = info; - if (samples.length < 10) return true; // 样本太少,无法判断 - - // 计算色调(H)的标准差 - const hues = samples.map(s => s.h); - const avgHue = hues.reduce((a, b) => a + b, 0) / hues.length; - - // 处理色调的循环性(0-360度) - let sumSquaredDiff = 0; - for (const h of hues) { - let diff = Math.abs(h - avgHue); - // 处理色调循环(例如 350° 和 10° 实际很接近) - if (diff > 180) diff = 360 - diff; - sumSquaredDiff += diff * diff; - } - - const hueStdDev = Math.sqrt(sumSquaredDiff / hues.length); - - // 计算饱和度(S)的标准差 - const saturations = samples.map(s => s.s); - const avgSat = saturations.reduce((a, b) => a + b, 0) / saturations.length; - const satStdDev = Math.sqrt( - saturations.reduce((sum, s) => sum + Math.pow(s - avgSat, 2), 0) / saturations.length - ); - - // 如果色调标准差小于60度,且饱和度标准差小于0.30,认为色调一致 - // 这表示区域内颜色比较统一,是真实滑块的可能性较大 - console.log(` 色调一致性 [x=${box.x}, y=${box.y}]: hueStdDev=${hueStdDev.toFixed(1)}°, satStdDev=${satStdDev.toFixed(3)}, avgSat=${avgSat.toFixed(3)}`); - return hueStdDev < 60 && satStdDev < 0.30; - } - - /** - * RGB转HSV色彩空间 - */ - private rgbToHsv(r: number, g: number, b: number): {h: number, s: number, v: number} { - r = r / 255; - g = g / 255; - b = b / 255; - - const max = Math.max(r, g, b); - const min = Math.min(r, g, b); - const delta = max - min; - - let h = 0; - let s = max === 0 ? 0 : delta / max; - let v = max; - - if (delta !== 0) { - if (max === r) { - h = 60 * (((g - b) / delta) % 6); - } else if (max === g) { - h = 60 * ((b - r) / delta + 2); - } else { - h = 60 * ((r - g) / delta + 4); - } - } - - if (h < 0) h += 360; - - return { h, s, v }; - } - - /** - * 检测暗色区域(滑块缺口) - 支持检测多个 - */ - private detectDarkRegions( - data: Buffer, - width: number, - height: number, - channels: number, - detectMultiple: boolean = false - ): BoundingBox[] { - // 混合策略:同时检测白色边缘和暗色区域 - const allCandidates: BoundingBox[] = []; - - // 策略1:检测白色边缘(滑块轮廓)- 增加更多阈值 - for (const brightThreshold of [130, 150, 170, 190, 210]) { - const whiteMap = new Uint8Array(width * height); + const projX = new Array(width).fill(0); + const projY = new Array(height).fill(0); for (let y = 0; y < height; y++) { for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - const brightness = (r * 0.299 + g * 0.587 + b * 0.114); - - const isWhite = brightness > brightThreshold && - Math.abs(r - g) < 60 && - Math.abs(g - b) < 60; - - whiteMap[y * width + x] = isWhite ? 1 : 0; - } - } - - const dilated = this.dilate(whiteMap, width, height, 5); - const regions = this.findDarkRegionsList(dilated, width, height); - const candidates = this.selectBestRegions(regions, width, height, true); - - for (const c of candidates) { - if (!allCandidates.some(e => this.calculateIoU(e, c) > 0.5)) { - allCandidates.push(c); - } - } - } - - // 策略2:检测暗色区域(滑块内部)- 增加更多阈值 - for (const darkThreshold of [60, 80, 100, 120, 140]) { - const darkMap = new Uint8Array(width * height); - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - const brightness = (r * 0.299 + g * 0.587 + b * 0.114); - - darkMap[y * width + x] = brightness < darkThreshold ? 1 : 0; - } - } - - const cleaned = this.morphologyClose(darkMap, width, height, 2); - const regions = this.findDarkRegionsList(cleaned, width, height); - const candidates = this.selectBestRegions(regions, width, height, true); - - for (const c of candidates) { - if (!allCandidates.some(e => this.calculateIoU(e, c) > 0.5)) { - allCandidates.push(c); - } - } - } - - if (allCandidates.length === 0) return []; - - // 排序并选择最佳2个 - allCandidates.sort((a, b) => { - const scoreA = Math.abs(a.width / a.height - 1) * 5 + Math.abs(a.width - 88) / 30; - const scoreB = Math.abs(b.width / b.height - 1) * 5 + Math.abs(b.width - 88) / 30; - return scoreA - scoreB; - }); - - const selected: BoundingBox[] = []; - for (const candidate of allCandidates) { - const overlaps = selected.some(s => this.calculateIoU(s, candidate) > 0.2); // 降低IoU阈值 - if (!overlaps) { - selected.push(candidate); - if (selected.length >= 3) break; // 增加到3个候选 - } - } - - // 如果检测到的数量不够,尝试放宽条件再找一次 - if (selected.length < 2) { - const relaxed = allCandidates.filter(c => - !selected.some(s => this.calculateIoU(s, c) > 0.1) && - c.width >= 65 && c.width <= 115 && - c.height >= 65 && c.height <= 115 - ); - - for (const candidate of relaxed) { - if (selected.length >= 2) break; - selected.push(candidate); - } - } - - return detectMultiple ? selected.slice(0, 2) : (selected.length > 0 ? [selected[0]] : []); - } - - /** - * 查找暗色连通区域 - 返回列表 - */ - private findDarkRegionsList( - binary: Uint8Array, - width: number, - height: number - ): BoundingBox[] { - const visited = new Uint8Array(width * height); - const regions: BoundingBox[] = []; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = y * width + x; - - if (visited[idx] === 0 && binary[idx] === 1) { - const region = this.floodFill(binary, visited, x, y, width, height); - - if (region.width >= 20 && region.height >= 20) { - regions.push(region); + const pixel = data[y * width + x]; + if (pixel > 0) { + projX[x]++; + projY[y]++; } } } - } - return regions; - } + const findBounds = (proj: number[], minThreshold = 2) => { + let start = -1; + let end = -1; - /** - * 选择最佳候选区域 - 支持返回多个 - */ - private selectBestRegions( - regions: BoundingBox[], - imageWidth: number, - imageHeight: number, - selectMultiple: boolean = false - ): BoundingBox[] { - if (regions.length === 0) return []; - - // 过滤掉太大的区域 - const validRegions = regions.filter(r => - r.width < imageWidth * 0.5 && r.height < imageHeight * 0.5 - ); - - // 豆瓣滑块缺口特征: - // 1. 宽度 50-100 像素 - // 2. 高度 50-170 像素(放宽以适应不同形状) - // 3. 宽高比 0.5-1.8(允许一定变形) - // 4. 高像素密度(填充率) - - const candidates = validRegions.filter(region => { - const aspectRatio = region.width / region.height; - const centerY = region.y + region.height / 2; - - // 滑块特征(更严格的正方形要求): - // 1. 大小 70-110px (正方形拼图块) - // 2. 宽高比 0.85-1.18 (严格的正方形,只允许小幅度变形) - // 3. 位置在图片的合理范围内 - // 4. 宽度和高度差异不超过20px - const sizeDiff = Math.abs(region.width - region.height); - - return ( - region.width >= 70 && region.width <= 110 && - region.height >= 70 && region.height <= 110 && - aspectRatio >= 0.85 && aspectRatio <= 1.18 && - sizeDiff <= 20 && // 宽高差不超过20px,确保是正方形 - centerY > imageHeight * 0.10 && - centerY < imageHeight * 0.80 - ); - }); - - if (candidates.length === 0) return []; - - // 按照质量排序:优先选择接近正方形且密度高的 - candidates.sort((a, b) => { - const densityA = a.score / (a.width * a.height); - const densityB = b.score / (b.width * b.height); - const aspectScoreA = Math.abs(a.width / a.height - 1); - const aspectScoreB = Math.abs(b.width / b.height - 1); - - // 密度优先,然后是形状 - return (densityB * 3 - aspectScoreB) - (densityA * 3 - aspectScoreA); - }); - - // 返回前N个不重叠的候选 - const selected: BoundingBox[] = []; - for (const candidate of candidates) { - // 检查是否与已选择的重叠 - const overlaps = selected.some(s => { - const iou = this.calculateIoU(s, candidate); - return iou > 0.3; // IoU > 0.3 认为重叠 - }); - - if (!overlaps) { - selected.push(candidate); - if (!selectMultiple && selected.length >= 1) break; - if (selectMultiple && selected.length >= 3) break; // 最多返回3个 - } - } - - return selected; - } - - /** - * 计算IoU - 移到这里以便selectBestRegions使用 - */ - private calculateIoU(box1: BoundingBox, box2: BoundingBox): number { - const x1 = Math.max(box1.x, box2.x); - const y1 = Math.max(box1.y, box2.y); - const x2 = Math.min(box1.x + box1.width, box2.x + box2.width); - const y2 = Math.min(box1.y + box1.height, box2.y + box2.height); - - const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); - const box1Area = box1.width * box1.height; - const box2Area = box2.width * box2.height; - const unionArea = box1Area + box2Area - intersectionArea; - - return unionArea > 0 ? intersectionArea / unionArea : 0; - } - - /** - * 形态学闭运算 - */ - private morphologyClose( - binary: Uint8Array, - width: number, - height: number, - kernelSize: number - ): Uint8Array { - const dilated = this.dilate(binary, width, height, kernelSize); - return this.erode(dilated, width, height, kernelSize); - } - - /** - * 膨胀操作 - */ - private dilate( - binary: Uint8Array, - width: number, - height: number, - kernelSize: number - ): Uint8Array { - const result = new Uint8Array(width * height); - const offset = Math.floor(kernelSize / 2); - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - let maxVal = 0; - - for (let ky = -offset; ky <= offset; ky++) { - for (let kx = -offset; kx <= offset; kx++) { - const ny = y + ky; - const nx = x + kx; - - if (nx >= 0 && nx < width && ny >= 0 && ny < height) { - maxVal = Math.max(maxVal, binary[ny * width + nx]); - } + for (let i = 0; i < proj.length; i++) { + if (proj[i] >= minThreshold) { + if (start === -1) start = i; + end = i; } } - - result[y * width + x] = maxVal; - } - } - return result; - } + if (start === -1) { + return { start: 0, end: proj.length - 1 }; + } - /** - * 腐蚀操作 - */ - private erode( - binary: Uint8Array, - width: number, - height: number, - kernelSize: number - ): Uint8Array { - const result = new Uint8Array(width * height); - const offset = Math.floor(kernelSize / 2); - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - let minVal = 1; - - for (let ky = -offset; ky <= offset; ky++) { - for (let kx = -offset; kx <= offset; kx++) { - const ny = y + ky; - const nx = x + kx; - - if (nx >= 0 && nx < width && ny >= 0 && ny < height) { - minVal = Math.min(minVal, binary[ny * width + nx]); - } + let bestStart = start; + for (let i = start; i < Math.min(proj.length, start + 10); i++) { + if (proj[i] >= minThreshold) { + bestStart = i; + break; } } - - result[y * width + x] = minVal; - } - } - return result; - } - - /** - * 查找暗色连通区域 - */ - private findDarkRegions( - binary: Uint8Array, - width: number, - height: number - ): BoundingBox[] { - const visited = new Uint8Array(width * height); - const regions: BoundingBox[] = []; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = y * width + x; - - if (visited[idx] === 0 && binary[idx] === 1) { - const region = this.floodFill(binary, visited, x, y, width, height); - - if (region.width >= 20 && region.height >= 20) { - regions.push(region); + let bestEnd = end; + for (let i = end; i >= Math.max(0, end - 10); i--) { + if (proj[i] >= minThreshold) { + bestEnd = i; + break; } } + + return { start: bestStart, end: bestEnd }; + }; + + const { start: xStart, end: xEnd } = findBounds(projX); + const { start: yStart, end: yEnd } = findBounds(projY); + + const newX = box.x + xStart; + const newY = box.y + yStart; + const newWidth = xEnd - xStart + 1; + const newHeight = yEnd - yStart + 1; + + if ( + newWidth <= 10 || + newHeight <= 10 || + newWidth > box.width * 1.2 || + newHeight > box.height * 1.2 + ) { + return box; } - } - return regions; + return { + x: newX, + y: newY, + width: newWidth, + height: newHeight, + score: box.score, + }; + } catch (error) { + console.error('[RefineBox] Failed to refine candidate, returning original box.', error); + return box; + } } - /** - * 洪水填充算法 - */ - private floodFill( - binary: Uint8Array, - visited: Uint8Array, - startX: number, - startY: number, - width: number, - height: number - ): BoundingBox { - let minX = startX; - let minY = startY; - let maxX = startX; - let maxY = startY; - let pixelCount = 0; - - const stack: Array<[number, number]> = [[startX, startY]]; - - while (stack.length > 0) { - const [x, y] = stack.pop()!; - - if (x < 0 || x >= width || y < 0 || y >= height) continue; - - const idx = y * width + x; - if (visited[idx] === 1) continue; - if (binary[idx] !== 1) continue; - - visited[idx] = 1; - pixelCount++; - - minX = Math.min(minX, x); - minY = Math.min(minY, y); - maxX = Math.max(maxX, x); - maxY = Math.max(maxY, y); - - stack.push([x + 1, y]); - stack.push([x - 1, y]); - stack.push([x, y + 1]); - stack.push([x, y - 1]); - } - - return { - x: minX, - y: minY, - width: maxX - minX + 1, - height: maxY - minY + 1, - score: pixelCount - }; - } - - /** - * 选择最佳候选区域 - */ - private selectBestRegion( - regions: BoundingBox[], - imageWidth: number, - imageHeight: number - ): BoundingBox | null { - if (regions.length === 0) return null; - - // 过滤掉整个图片大小的区域 - const validRegions = regions.filter(r => - r.width < imageWidth * 0.9 && r.height < imageHeight * 0.9 - ); - - if (validRegions.length === 0) return null; - - // 豆瓣滑块缺口特征: - // 1. 宽度 50-95 像素(放宽上限) - // 2. 高度 50-95 像素 - // 3. 宽高比接近 1(正方形缺口) - // 4. 高像素密度(填充率 > 0.60)- 降低阈值以适应对比度低的图片 - - const candidates = validRegions.filter(region => { - const aspectRatio = region.width / region.height; - const density = region.score / (region.width * region.height); - const centerY = region.y + region.height / 2; - - return ( - region.width >= 50 && region.width <= 95 && - region.height >= 50 && region.height <= 95 && - aspectRatio >= 0.85 && aspectRatio <= 1.18 && - centerY > imageHeight * 0.12 && - centerY < imageHeight * 0.78 && - density > 0.60 // 降低密度阈值 - ); - }); - - if (candidates.length > 0) { - // 优先选择密度最高且最接近正方形的 - candidates.sort((a, b) => { - const densityA = a.score / (a.width * a.height); - const densityB = b.score / (b.width * b.height); - const aspectScoreA = Math.abs(a.width / a.height - 1); - const aspectScoreB = Math.abs(b.width / b.height - 1); - - // 密度权重更高 - return (densityB * 2 + (1 - aspectScoreB)) - (densityA * 2 + (1 - aspectScoreA)); - }); - - return candidates[0]; - } - - // 放宽条件再试一次 - const relaxedCandidates = validRegions.filter(region => { - const aspectRatio = region.width / region.height; - const density = region.score / (region.width * region.height); - - return ( - region.width >= 45 && region.width <= 100 && - region.height >= 45 && region.height <= 100 && - aspectRatio >= 0.75 && aspectRatio <= 1.33 && - region.y < imageHeight * 0.82 && - region.y > imageHeight * 0.06 && - density > 0.45 // 进一步降低 - ); - }); - - if (relaxedCandidates.length === 0) return null; - - // 选择密度最高的 - relaxedCandidates.sort((a, b) => { - const densityA = a.score / (a.width * a.height); - const densityB = b.score / (b.width * b.height); - return densityB - densityA; - }); - - return relaxedCandidates[0]; - } - - /** - * 在图像上绘制边界框(支持多个) - */ private async drawBoundingBoxes( imagePath: string, boxes: BoundingBox[], outputPath: string, - color: 'red' | 'blue' | 'green' = 'blue' + color: BoxColor = 'blue' ): Promise { - const colorMap = { + if (boxes.length === 0) { + return; + } + + const colorMap: Record = { red: { r: 255, g: 0, b: 0 }, blue: { r: 0, g: 0, b: 255 }, - green: { r: 0, g: 255, b: 0 } + green: { r: 0, g: 255, b: 0 }, }; const rgb = colorMap[color]; - const lineWidth = 3; const image = sharp(imagePath); const metadata = await image.metadata(); if (!metadata.width || !metadata.height) { - throw new Error('Cannot get image dimensions'); + throw new Error('Cannot draw bounding boxes without image dimensions'); } - // 创建SVG覆盖层绘制所有矩形框 - const rects = boxes.map(box => ` - ` + `).join('\n'); + stroke-width="2" + />` + ) + .join('\n'); - const svg = ` - - ${rects} - - `; + const svg = Buffer.from( + ` + ${rectangles} + ` + ); await image - .composite([{ - input: Buffer.from(svg), - top: 0, - left: 0 - }]) + .composite([{ input: svg, top: 0, left: 0 }]) .toFile(outputPath); } - - /** - * 验证检测结果 - */ - async validateDetection( - imagePath: string, - targetImagePath: string - ): Promise<{ match: boolean; iou: number }> { - const targetBox = await this.extractRedBox(targetImagePath); - if (!targetBox) { - console.log(`No red box found in ${targetImagePath}`); - return { match: false, iou: 0 }; - } - - const detected = await this.detectSlider(imagePath, undefined, true); // 检测多个 - if (!detected) { - console.log(`No slider detected in ${imagePath}`); - return { match: false, iou: 0 }; - } - - const detectedBoxes = Array.isArray(detected) ? detected : [detected]; - - // 检查是否有任何一个检测框在红框内 - let bestIoU = 0; - let anyInside = false; - - for (const detectedBox of detectedBoxes) { - const isInside = this.isBoxInside(detectedBox, targetBox); - const iou = this.calculateIoU(detectedBox, targetBox); - - if (isInside) anyInside = true; - bestIoU = Math.max(bestIoU, iou); - } - - // 如果任何缺口在红框内,认为检测成功 - const match = anyInside; - - return { match, iou: bestIoU }; - } - - /** - * 检查box1是否在box2内部(或大部分重叠) - */ - private isBoxInside(box1: BoundingBox, box2: BoundingBox): boolean { - const centerX = box1.x + box1.width / 2; - const centerY = box1.y + box1.height / 2; - - // 检查中心点是否在box2内 - return ( - centerX >= box2.x && - centerX <= box2.x + box2.width && - centerY >= box2.y && - centerY <= box2.y + box2.height - ); - } - - /** - * 从标注图像中提取红色框 - */ - private async extractRedBox(imagePath: string): Promise { - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - let minX = width; - let minY = height; - let maxX = 0; - let maxY = 0; - let foundRed = false; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - - if (r > 200 && g < 100 && b < 100) { - foundRed = true; - minX = Math.min(minX, x); - minY = Math.min(minY, y); - maxX = Math.max(maxX, x); - maxY = Math.max(maxY, y); - } - } - } - - if (!foundRed) return null; - - return { - x: minX, - y: minY, - width: maxX - minX + 1, - height: maxY - minY + 1, - score: 1.0 - }; - } } -async function main() { - const detector = new SliderDetector(); - const validator = new SliderValidator(); - const baseDir = path.join(__dirname, '..'); - - console.log('=== 开始滑块检测 ===\n'); - - // 1. 验证算法准确性(使用新的验证器) - console.log('1. 验证算法准确性(容差:10px)...\n'); - const doubanTargetDir = path.join(baseDir, 'images', 'douban-target'); - const doubanDir = path.join(baseDir, 'images', 'douban'); - - if (fs.existsSync(doubanTargetDir)) { - const targetFiles = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png')); - let totalMatched = 0; - let totalTargets = 0; - let totalDetected = 0; - - for (const file of targetFiles) { - const targetPath = path.join(doubanTargetDir, file); - const imagePath = path.join(doubanDir, file); - - if (!fs.existsSync(imagePath)) { - console.log(` 跳过 ${file} (原图不存在)`); - continue; - } - - // 提取标准答案(红框) - const targetBoxes = await validator.extractRedBoxes(targetPath); - - // 检测滑块(检测所有可能的滑块) - const detected = await detector.detectSlider(imagePath, undefined, true); - const detectedBoxes = detected ? (Array.isArray(detected) ? detected : [detected]) : []; - - // 转换为验证器的格式 - const detectedValidatorBoxes: ValidatorBox[] = detectedBoxes.map(b => ({ - x: b.x, - y: b.y, - width: b.width, - height: b.height - })); - - // 验证 - const result = await validator.validateDetection(detectedValidatorBoxes, targetBoxes, 10); - - console.log(` ${file}:`); - console.log(` 目标: ${result.totalTargets}个, 检测: ${result.detectedCount}个, 匹配: ${result.matchedCount}个`); - console.log(` 准确率: ${(result.precision * 100).toFixed(1)}%, 召回率: ${(result.recall * 100).toFixed(1)}%`); - - if (result.matches.length > 0) { - result.matches.forEach((m, i) => { - console.log(` 匹配${i + 1}: IoU=${m.iou.toFixed(3)}`); - }); - } - - if (result.matchedCount < result.totalTargets) { - console.log(` ⚠️ 漏检: ${result.totalTargets - result.matchedCount}个滑块`); - } - - if (result.unmatched.length > 0) { - console.log(` ⚠️ 误检: ${result.unmatched.length}个`); - } - - console.log(''); - - totalMatched += result.matchedCount; - totalTargets += result.totalTargets; - totalDetected += result.detectedCount; - } - - const overallPrecision = totalDetected > 0 ? (totalMatched / totalDetected * 100).toFixed(1) : '0.0'; - const overallRecall = totalTargets > 0 ? (totalMatched / totalTargets * 100).toFixed(1) : '0.0'; - - console.log(`总体统计:`); - console.log(` 总目标数: ${totalTargets}个`); - console.log(` 总检测数: ${totalDetected}个`); - console.log(` 成功匹配: ${totalMatched}个`); - console.log(` 准确率(Precision): ${overallPrecision}%`); - console.log(` 召回率(Recall): ${overallRecall}%\n`); - } - - // 2. 处理所有豆瓣图片并输出结果 - console.log('2. 处理豆瓣滑块图片...'); - const outputDir = path.join(baseDir, 'images', 'output'); - - if (!fs.existsSync(outputDir)) { - fs.mkdirSync(outputDir, { recursive: true }); - } - - if (fs.existsSync(doubanDir)) { - const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')); - let processedCount = 0; - - for (const file of files) { - const inputPath = path.join(doubanDir, file); - const outputPath = path.join(outputDir, file); - - const result = await detector.detectSlider(inputPath, outputPath, true); - - if (result && result.length > 0) { - console.log(` ✅ Processed ${file}, found ${result.length} sliders.`); - processedCount++; - } else { - console.log(` ❌ Processed ${file}, but no slider was found.`); - } - } - - if (files.length > 0) { - console.log(`\n Processed ${processedCount} out of ${files.length} images.`); - } +function toRawImage(data: Buffer, info: sharp.OutputInfo): RawImage { + const { width, height, channels } = info; + if (!width || !height || !channels) { + throw new Error('Failed to read image metadata.'); } + return { data, width, height, channels }; } - -main(); diff --git a/src/edge-detector.ts b/src/edge-detector.ts deleted file mode 100644 index bd40d29..0000000 --- a/src/edge-detector.ts +++ /dev/null @@ -1,392 +0,0 @@ -import sharp from 'sharp'; -import * as fs from 'fs'; -import * as path from 'path'; - -interface BoundingBox { - x: number; - y: number; - width: number; - height: number; - score: number; -} - -export class EdgeDetector { - /** - * 检测滑块 - 基于边缘检测 - */ - async detectSlider( - imagePath: string, - outputPath?: string, - detectMultiple: boolean = false - ): Promise { - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - // 1. 转换为灰度并计算梯度(边缘强度) - const edges = this.detectEdges(data, width, height, channels); - - // 2. 应用阈值获取强边缘 - const binary = this.thresholdEdges(edges, width, height); - - // 3. 形态学操作连接边缘 - const connected = this.morphologyClose(binary, width, height, 3); - - // 4. 查找连通区域 - const regions = this.findEdgeRegionsList(connected, width, height); - - // 5. 筛选候选 - const candidates = this.selectBestRegions(regions, width, height, true); - - if (candidates.length === 0) { - return detectMultiple ? [] : null; - } - - // 6. 如果需要输出可视化 - if (outputPath) { - await this.drawBoxes(imagePath, candidates, outputPath); - } - - return detectMultiple ? candidates : candidates[0]; - } - - /** - * Sobel边缘检测 - */ - private detectEdges( - data: Buffer, - width: number, - height: number, - channels: number - ): Float32Array { - const edges = new Float32Array(width * height); - - // Sobel算子 - const sobelX = [ - [-1, 0, 1], - [-2, 0, 2], - [-1, 0, 1] - ]; - - const sobelY = [ - [-1, -2, -1], - [0, 0, 0], - [1, 2, 1] - ]; - - for (let y = 1; y < height - 1; y++) { - for (let x = 1; x < width - 1; x++) { - let gx = 0; - let gy = 0; - - // 计算Sobel梯度 - for (let ky = -1; ky <= 1; ky++) { - for (let kx = -1; kx <= 1; kx++) { - const idx = ((y + ky) * width + (x + kx)) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - const brightness = r * 0.299 + g * 0.587 + b * 0.114; - - gx += brightness * sobelX[ky + 1][kx + 1]; - gy += brightness * sobelY[ky + 1][kx + 1]; - } - } - - const magnitude = Math.sqrt(gx * gx + gy * gy); - edges[y * width + x] = magnitude; - } - } - - return edges; - } - - /** - * 边缘二值化 - */ - private thresholdEdges( - edges: Float32Array, - width: number, - height: number - ): Uint8Array { - // 计算边缘强度的统计信息 - let max = 0; - for (let i = 0; i < edges.length; i++) { - max = Math.max(max, edges[i]); - } - - // 使用自适应阈值(最大值的20%) - const threshold = max * 0.15; - - const binary = new Uint8Array(width * height); - for (let i = 0; i < edges.length; i++) { - binary[i] = edges[i] > threshold ? 1 : 0; - } - - return binary; - } - - /** - * 形态学闭运算 - */ - private morphologyClose( - binary: Uint8Array, - width: number, - height: number, - kernelSize: number - ): Uint8Array { - const dilated = this.dilate(binary, width, height, kernelSize); - return this.erode(dilated, width, height, kernelSize); - } - - /** - * 膨胀操作 - */ - private dilate( - binary: Uint8Array, - width: number, - height: number, - kernelSize: number - ): Uint8Array { - const result = new Uint8Array(width * height); - const offset = Math.floor(kernelSize / 2); - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - let maxVal = 0; - - for (let ky = -offset; ky <= offset; ky++) { - for (let kx = -offset; kx <= offset; kx++) { - const ny = y + ky; - const nx = x + kx; - - if (nx >= 0 && nx < width && ny >= 0 && ny < height) { - maxVal = Math.max(maxVal, binary[ny * width + nx]); - } - } - } - - result[y * width + x] = maxVal; - } - } - - return result; - } - - /** - * 腐蚀操作 - */ - private erode( - binary: Uint8Array, - width: number, - height: number, - kernelSize: number - ): Uint8Array { - const result = new Uint8Array(width * height); - const offset = Math.floor(kernelSize / 2); - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - let minVal = 1; - - for (let ky = -offset; ky <= offset; ky++) { - for (let kx = -offset; kx <= offset; kx++) { - const ny = y + ky; - const nx = x + kx; - - if (nx >= 0 && nx < width && ny >= 0 && ny < height) { - minVal = Math.min(minVal, binary[ny * width + nx]); - } - } - } - - result[y * width + x] = minVal; - } - } - - return result; - } - - /** - * 查找边缘连通区域 - */ - private findEdgeRegionsList( - binary: Uint8Array, - width: number, - height: number - ): BoundingBox[] { - const visited = new Uint8Array(width * height); - const regions: BoundingBox[] = []; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = y * width + x; - - if (visited[idx] === 0 && binary[idx] === 1) { - const region = this.floodFill(binary, visited, x, y, width, height); - - if (region.width >= 30 && region.height >= 30) { - regions.push(region); - } - } - } - } - - return regions; - } - - /** - * 泛洪填充 - */ - private floodFill( - binary: Uint8Array, - visited: Uint8Array, - startX: number, - startY: number, - width: number, - height: number - ): BoundingBox { - const stack: Array<[number, number]> = [[startX, startY]]; - let pixelCount = 0; - let minX = width; - let minY = height; - let maxX = 0; - let maxY = 0; - - while (stack.length > 0) { - const [x, y] = stack.pop()!; - - if (x < 0 || x >= width || y < 0 || y >= height) continue; - - const idx = y * width + x; - if (visited[idx] === 1) continue; - if (binary[idx] !== 1) continue; - - visited[idx] = 1; - pixelCount++; - - minX = Math.min(minX, x); - minY = Math.min(minY, y); - maxX = Math.max(maxX, x); - maxY = Math.max(maxY, y); - - stack.push([x + 1, y]); - stack.push([x - 1, y]); - stack.push([x, y + 1]); - stack.push([x, y - 1]); - } - - return { - x: minX, - y: minY, - width: maxX - minX + 1, - height: maxY - minY + 1, - score: pixelCount - }; - } - - /** - * 选择最佳候选区域 - */ - private selectBestRegions( - regions: BoundingBox[], - imageWidth: number, - imageHeight: number, - selectMultiple: boolean = false - ): BoundingBox[] { - if (regions.length === 0) return []; - - // 滑块特征(基于边缘): - // 1. 大小 70-110px (放宽范围) - // 2. 宽高比 0.7-1.4 (接近正方形) - // 3. 位置在图片的合理范围内 - // 4. 边缘密度适中(不会太sparse) - - const candidates = regions.filter(region => { - const aspectRatio = region.width / region.height; - const centerY = region.y + region.height / 2; - const edgeDensity = region.score / (region.width * region.height); - - return ( - region.width >= 70 && region.width <= 110 && - region.height >= 70 && region.height <= 110 && - aspectRatio >= 0.7 && aspectRatio <= 1.4 && - centerY > imageHeight * 0.15 && - centerY < imageHeight * 0.75 && - edgeDensity > 0.08 && edgeDensity < 0.45 // 边缘密度:不太稀疏,也不太密集 - ); - }); - - if (candidates.length === 0) return []; - - // 按质量排序 - candidates.sort((a, b) => { - const scoreA = Math.abs(a.width / a.height - 1) + Math.abs(a.width - 90) / 100; - const scoreB = Math.abs(b.width / b.height - 1) + Math.abs(b.width - 90) / 100; - return scoreA - scoreB; - }); - - // 返回不重叠的候选 - const selected: BoundingBox[] = []; - for (const candidate of candidates) { - const overlaps = selected.some(s => { - const iou = this.calculateIoU(s, candidate); - return iou > 0.3; - }); - - if (!overlaps) { - selected.push(candidate); - if (!selectMultiple && selected.length >= 1) break; - if (selectMultiple && selected.length >= 3) break; - } - } - - return selected; - } - - /** - * 计算IoU - */ - private calculateIoU(box1: BoundingBox, box2: BoundingBox): number { - const x1 = Math.max(box1.x, box2.x); - const y1 = Math.max(box1.y, box2.y); - const x2 = Math.min(box1.x + box1.width, box2.x + box2.width); - const y2 = Math.min(box1.y + box1.height, box2.y + box2.height); - - const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); - const box1Area = box1.width * box1.height; - const box2Area = box2.width * box2.height; - const unionArea = box1Area + box2Area - intersectionArea; - - return unionArea > 0 ? intersectionArea / unionArea : 0; - } - - /** - * 绘制检测框 - */ - private async drawBoxes( - imagePath: string, - boxes: BoundingBox[], - outputPath: string - ): Promise { - const image = sharp(imagePath); - const metadata = await image.metadata(); - const { width = 0, height = 0 } = metadata; - - const svgBoxes = boxes.map(box => - `` - ).join('\n'); - - const svg = ` - - ${svgBoxes} - - `; - - await image - .composite([{ input: Buffer.from(svg), top: 0, left: 0 }]) - .toFile(outputPath); - } -} diff --git a/src/extract-targets.ts b/src/extract-targets.ts deleted file mode 100644 index 132270e..0000000 --- a/src/extract-targets.ts +++ /dev/null @@ -1,179 +0,0 @@ -import sharp from 'sharp'; -import * as fs from 'fs'; -import * as path from 'path'; - -interface BoundingBox { - x: number; - y: number; - width: number; - height: number; -} - -/** - * 从标注图像中提取所有红框(支持多个) - */ -async function extractAllRedBoxes(imagePath: string): Promise { - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - // 创建红色像素的二值图 - const redMap = new Uint8Array(width * height); - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - - // 检测红色像素(高R值,低G和B值) - redMap[y * width + x] = (r > 200 && g < 100 && b < 100) ? 1 : 0; - } - } - - // 使用连通区域分析找到所有红框 - const visited = new Uint8Array(width * height); - const boxes: BoundingBox[] = []; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = y * width + x; - - if (visited[idx] === 0 && redMap[idx] === 1) { - // 找到一个新的红色区域 - let minX = x, minY = y, maxX = x, maxY = y; - const stack: Array<[number, number]> = [[x, y]]; - - while (stack.length > 0) { - const [cx, cy] = stack.pop()!; - if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue; - - const cidx = cy * width + cx; - if (visited[cidx] === 1 || redMap[cidx] !== 1) continue; - - visited[cidx] = 1; - minX = Math.min(minX, cx); - minY = Math.min(minY, cy); - maxX = Math.max(maxX, cx); - maxY = Math.max(maxY, cy); - - stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]); - } - - const boxWidth = maxX - minX + 1; - const boxHeight = maxY - minY + 1; - - // 过滤掉太小的噪点(红框应该足够大) - if (boxWidth > 50 && boxHeight > 30) { - boxes.push({ - x: minX, - y: minY, - width: boxWidth, - height: boxHeight - }); - } - } - } - } - - return boxes; -} - -/** - * 在红框内查找实际的滑块缺口(小的正方形区域) - */ -function findSlidersInRedBox(redBox: BoundingBox): BoundingBox[] { - // 红框通常标注的是一个横向区域,里面包含1-2个滑块缺口 - // 滑块缺口特征:50-90像素的正方形 - - const sliders: BoundingBox[] = []; - const expectedSliderSize = 60; // 预期滑块大小 - - // 如果红框宽度远大于高度,说明是横向区域,可能包含多个滑块 - if (redBox.width > redBox.height * 2) { - // 估算可能有几个滑块 - const possibleCount = Math.round(redBox.width / expectedSliderSize); - - if (possibleCount >= 2) { - // 可能有2个滑块,在红框的左右两侧 - sliders.push({ - x: redBox.x, - y: redBox.y, - width: Math.min(90, redBox.height), - height: redBox.height - }); - - sliders.push({ - x: redBox.x + redBox.width - Math.min(90, redBox.height), - y: redBox.y, - width: Math.min(90, redBox.height), - height: redBox.height - }); - } else { - // 只有1个滑块,使用红框高度作为大小 - sliders.push({ - x: redBox.x, - y: redBox.y, - width: Math.min(90, redBox.height), - height: redBox.height - }); - } - } else { - // 红框本身就接近正方形,直接使用 - sliders.push(redBox); - } - - return sliders; -} - -async function main() { - const baseDir = path.join(__dirname, '..'); - const targetDir = path.join(baseDir, 'images', 'douban-target'); - - if (!fs.existsSync(targetDir)) { - console.error('目录不存在:', targetDir); - return; - } - - const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png')); - - console.log('=== 提取红框标注信息 ===\n'); - - const groundTruth: Record = {}; - - for (const file of files) { - const imagePath = path.join(targetDir, file); - const redBoxes = await extractAllRedBoxes(imagePath); - - console.log(`${file}:`); - console.log(` 找到 ${redBoxes.length} 个红框标注`); - - const allSliders: BoundingBox[] = []; - - redBoxes.forEach((box, idx) => { - console.log(` 红框${idx + 1}: [x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}]`); - - // 分析红框内的滑块 - const sliders = findSlidersInRedBox(box); - console.log(` -> 推测包含 ${sliders.length} 个滑块`); - - sliders.forEach((slider, sIdx) => { - console.log(` 滑块${sIdx + 1}: [x=${slider.x}, y=${slider.y}, w=${slider.width}, h=${slider.height}]`); - allSliders.push(slider); - }); - }); - - groundTruth[file] = allSliders; - console.log(''); - } - - // 保存标准答案到文件 - const outputPath = path.join(baseDir, 'ground-truth.json'); - fs.writeFileSync(outputPath, JSON.stringify(groundTruth, null, 2)); - console.log(`标准答案已保存到: ${outputPath}\n`); -} - -main().catch(console.error); diff --git a/src/index.ts b/src/index.ts deleted file mode 100644 index da32c4c..0000000 --- a/src/index.ts +++ /dev/null @@ -1,586 +0,0 @@ -import sharp from 'sharp'; -import * as fs from 'fs'; -import * as path from 'path'; - -interface BoundingBox { - x: number; - y: number; - width: number; - height: number; - score: number; -} - -class SliderDetector { - /** - * 使用模板匹配来检测滑块位置 - */ - async detectSlider( - imagePath: string, - outputPath?: string - ): Promise { - try { - // 读取主图像 - const imageBuffer = await sharp(imagePath).ensureAlpha().raw().toBuffer({ resolveWithObject: true }); - const { data: imageData, info: imageInfo } = imageBuffer; - - // 使用边缘检测来找滑块 - const box = await this.findSliderByEdgeDetection(imagePath); - - // 如果需要输出结果 - if (outputPath && box) { - await this.drawBoundingBox(imagePath, box, outputPath, 'blue'); - } - - return box; - } catch (error) { - console.error(`Error detecting slider in ${imagePath}:`, error); - return null; - } - } - - /** - * 使用边缘检测和形状分析来找滑块 - */ - private async findSliderByEdgeDetection(imagePath: string): Promise { - // 读取原始图像数据 - const { data: rawData, info: rawInfo } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = rawInfo; - - // 方法1: 检测滑块缺口(暗色区域) - const gapBox = this.detectSliderGap(rawData, width, height, channels); - if (gapBox) { - return gapBox; - } - - // 方法2: 使用边缘检测 - const processed = await sharp(imagePath) - .greyscale() - .normalize() - .toBuffer(); - - // 应用边缘检测(使用锐化滤镜来增强边缘) - const edges = await sharp(processed) - .convolve({ - width: 3, - height: 3, - kernel: [-1, -1, -1, -1, 8, -1, -1, -1, -1] - }) - .toBuffer({ resolveWithObject: true }); - - const { data, info } = edges; - const edgeChannels = info.channels; - - // 二值化 - const threshold = 40; - const binary = new Uint8Array(data.length); - for (let i = 0; i < data.length; i += edgeChannels) { - const value = data[i] > threshold ? 255 : 0; - for (let j = 0; j < edgeChannels; j++) { - binary[i + j] = value; - } - } - - // 查找连通区域 - const regions = this.findConnectedRegions(binary, width, height, edgeChannels); - - // 过滤并找到最可能的滑块区域 - const sliderRegion = this.findSliderRegion(regions, width, height); - - return sliderRegion; - } - - /** - * 检测滑块缺口(豆瓣滑块通常在图片上有一个明显的缺口) - */ - private detectSliderGap( - data: Buffer, - width: number, - height: number, - channels: number - ): BoundingBox | null { - // 创建亮度图 - const brightness = new Float32Array(width * height); - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - brightness[y * width + x] = (r + g + b) / 3; - } - } - - // 计算垂直和水平投影来检测边界 - const verticalProj = new Float32Array(width); - const horizontalProj = new Float32Array(height); - - for (let x = 0; x < width; x++) { - let sum = 0; - for (let y = 0; y < height; y++) { - // 检测亮度变化(边缘) - if (y > 0) { - const diff = Math.abs(brightness[y * width + x] - brightness[(y - 1) * width + x]); - sum += diff; - } - } - verticalProj[x] = sum; - } - - for (let y = 0; y < height; y++) { - let sum = 0; - for (let x = 0; x < width; x++) { - if (x > 0) { - const diff = Math.abs(brightness[y * width + x] - brightness[y * width + (x - 1)]); - sum += diff; - } - } - horizontalProj[y] = sum; - } - - // 寻找投影中的峰值区域(滑块边界) - const sliderCandidates = this.findProjectionPeaks( - verticalProj, - horizontalProj, - width, - height - ); - - if (sliderCandidates.length > 0) { - return sliderCandidates[0]; - } - - return null; - } - - /** - * 从投影数据中找到峰值区域 - */ - private findProjectionPeaks( - verticalProj: Float32Array, - horizontalProj: Float32Array, - width: number, - height: number - ): BoundingBox[] { - const candidates: BoundingBox[] = []; - - // 计算阈值 - const vThreshold = this.calculateThreshold(verticalProj); - const hThreshold = this.calculateThreshold(horizontalProj); - - // 找垂直方向的峰值区域 - const vRegions: Array<[number, number]> = []; - let inRegion = false; - let start = 0; - - for (let x = 0; x < width; x++) { - if (verticalProj[x] > vThreshold && !inRegion) { - start = x; - inRegion = true; - } else if (verticalProj[x] <= vThreshold && inRegion) { - if (x - start >= 30 && x - start <= 100) { - vRegions.push([start, x]); - } - inRegion = false; - } - } - - // 找水平方向的峰值区域 - const hRegions: Array<[number, number]> = []; - inRegion = false; - start = 0; - - for (let y = 0; y < height; y++) { - if (horizontalProj[y] > hThreshold && !inRegion) { - start = y; - inRegion = true; - } else if (horizontalProj[y] <= hThreshold && inRegion) { - if (y - start >= 30 && y - start <= 100) { - hRegions.push([start, y]); - } - inRegion = false; - } - } - - // 组合垂直和水平区域形成候选框 - for (const [x1, x2] of vRegions) { - for (const [y1, y2] of hRegions) { - const w = x2 - x1; - const h = y2 - y1; - const aspectRatio = w / h; - - // 滑块通常是正方形或接近正方形 - if (aspectRatio >= 0.6 && aspectRatio <= 1.7) { - candidates.push({ - x: x1, - y: y1, - width: w, - height: h, - score: 1.0 - }); - } - } - } - - return candidates; - } - - /** - * 计算自适应阈值 - */ - private calculateThreshold(values: Float32Array): number { - let sum = 0; - for (let i = 0; i < values.length; i++) { - sum += values[i]; - } - const mean = sum / values.length; - - // 使用平均值的1.5倍作为阈值 - return mean * 1.5; - } - - /** - * 查找连通区域 - */ - private findConnectedRegions( - binary: Uint8Array, - width: number, - height: number, - channels: number - ): BoundingBox[] { - const visited = new Uint8Array(width * height); - const regions: BoundingBox[] = []; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = y * width + x; - const pixelIdx = idx * channels; - - if (visited[idx] === 0 && binary[pixelIdx] === 255) { - const region = this.floodFill(binary, visited, x, y, width, height, channels); - if (region.width > 10 && region.height > 10) { // 过滤太小的区域 - regions.push(region); - } - } - } - } - - return regions; - } - - /** - * 洪水填充算法查找连通区域 - */ - private floodFill( - binary: Uint8Array, - visited: Uint8Array, - startX: number, - startY: number, - width: number, - height: number, - channels: number - ): BoundingBox { - let minX = startX; - let minY = startY; - let maxX = startX; - let maxY = startY; - - const stack: Array<[number, number]> = [[startX, startY]]; - - while (stack.length > 0) { - const [x, y] = stack.pop()!; - - if (x < 0 || x >= width || y < 0 || y >= height) continue; - - const idx = y * width + x; - if (visited[idx] === 1) continue; - - const pixelIdx = idx * channels; - if (binary[pixelIdx] !== 255) continue; - - visited[idx] = 1; - - minX = Math.min(minX, x); - minY = Math.min(minY, y); - maxX = Math.max(maxX, x); - maxY = Math.max(maxY, y); - - stack.push([x + 1, y]); - stack.push([x - 1, y]); - stack.push([x, y + 1]); - stack.push([x, y - 1]); - } - - return { - x: minX, - y: minY, - width: maxX - minX + 1, - height: maxY - minY + 1, - score: 1.0 - }; - } - - /** - * 从所有区域中找到最可能的滑块区域 - */ - private findSliderRegion( - regions: BoundingBox[], - imageWidth: number, - imageHeight: number - ): BoundingBox | null { - if (regions.length === 0) return null; - - // 滑块通常的特征: - // 1. 宽度在30-80像素之间 - // 2. 高度在30-80像素之间 - // 3. 宽高比接近1(正方形) - // 4. 位于图像上半部分 - - const candidates = regions.filter(region => { - const aspectRatio = region.width / region.height; - return ( - region.width >= 30 && region.width <= 100 && - region.height >= 30 && region.height <= 100 && - aspectRatio >= 0.5 && aspectRatio <= 2.0 && - region.y < imageHeight * 0.7 // 在图像上部70%的区域内 - ); - }); - - if (candidates.length === 0) return null; - - // 选择最方正的区域(宽高比最接近1) - candidates.sort((a, b) => { - const ratioA = Math.abs(a.width / a.height - 1); - const ratioB = Math.abs(b.width / b.height - 1); - return ratioA - ratioB; - }); - - const best = candidates[0]; - - // 扩展边界框以包含完整滑块(增加一些边距) - const padding = 5; - return { - x: Math.max(0, best.x - padding), - y: Math.max(0, best.y - padding), - width: Math.min(imageWidth - best.x + padding, best.width + padding * 2), - height: Math.min(imageHeight - best.y + padding, best.height + padding * 2), - score: best.score - }; - } - - /** - * 在图像上绘制边界框 - */ - private async drawBoundingBox( - imagePath: string, - box: BoundingBox, - outputPath: string, - color: 'red' | 'blue' | 'green' = 'blue' - ): Promise { - const colorMap = { - red: { r: 255, g: 0, b: 0 }, - blue: { r: 0, g: 0, b: 255 }, - green: { r: 0, g: 255, b: 0 } - }; - - const rgb = colorMap[color]; - const lineWidth = 2; - - // 读取原始图像 - const image = sharp(imagePath); - const metadata = await image.metadata(); - - if (!metadata.width || !metadata.height) { - throw new Error('Cannot get image dimensions'); - } - - // 创建SVG覆盖层绘制矩形框 - const svg = ` - - - - `; - - await image - .composite([{ - input: Buffer.from(svg), - top: 0, - left: 0 - }]) - .toFile(outputPath); - } - - /** - * 验证检测结果与人工标注的匹配度 - */ - async validateDetection( - imagePath: string, - targetImagePath: string - ): Promise<{ match: boolean; iou: number }> { - // 从人工标注图像中提取红色框的位置 - const targetBox = await this.extractRedBox(targetImagePath); - if (!targetBox) { - console.log(`No red box found in ${targetImagePath}`); - return { match: false, iou: 0 }; - } - - // 检测滑块位置 - const detectedBox = await this.detectSlider(imagePath); - if (!detectedBox) { - console.log(`No slider detected in ${imagePath}`); - return { match: false, iou: 0 }; - } - - // 计算IoU (Intersection over Union) - const iou = this.calculateIoU(detectedBox, targetBox); - const match = iou > 0.5; // IoU > 0.5 认为匹配成功 - - return { match, iou }; - } - - /** - * 从标注图像中提取红色框 - */ - private async extractRedBox(imagePath: string): Promise { - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - // 查找红色像素 - let minX = width; - let minY = height; - let maxX = 0; - let maxY = 0; - let foundRed = false; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - - // 检测红色像素 (高R值,低G和B值) - if (r > 200 && g < 100 && b < 100) { - foundRed = true; - minX = Math.min(minX, x); - minY = Math.min(minY, y); - maxX = Math.max(maxX, x); - maxY = Math.max(maxY, y); - } - } - } - - if (!foundRed) return null; - - return { - x: minX, - y: minY, - width: maxX - minX + 1, - height: maxY - minY + 1, - score: 1.0 - }; - } - - /** - * 计算两个边界框的IoU - */ - private calculateIoU(box1: BoundingBox, box2: BoundingBox): number { - const x1 = Math.max(box1.x, box2.x); - const y1 = Math.max(box1.y, box2.y); - const x2 = Math.min(box1.x + box1.width, box2.x + box2.width); - const y2 = Math.min(box1.y + box1.height, box2.y + box2.height); - - const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); - const box1Area = box1.width * box1.height; - const box2Area = box2.width * box2.height; - const unionArea = box1Area + box2Area - intersectionArea; - - return intersectionArea / unionArea; - } -} - -async function main() { - const detector = new SliderDetector(); - const baseDir = path.join(__dirname, '..'); - - console.log('=== 开始滑块检测 ===\n'); - - // 1. 验证算法准确性(使用douban-target中的标注图片) - console.log('1. 验证算法准确性...'); - const doubanTargetDir = path.join(baseDir, 'images', 'douban-target'); - const doubanDir = path.join(baseDir, 'images', 'douban'); - - if (fs.existsSync(doubanTargetDir)) { - const targetFiles = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png')); - let successCount = 0; - let totalIoU = 0; - - for (const file of targetFiles) { - const targetPath = path.join(doubanTargetDir, file); - const imagePath = path.join(doubanDir, file); - - if (!fs.existsSync(imagePath)) { - console.log(` 跳过 ${file} (原图不存在)`); - continue; - } - - const result = await detector.validateDetection(imagePath, targetPath); - console.log(` ${file}: IoU = ${result.iou.toFixed(3)}, 匹配 = ${result.match ? '✓' : '✗'}`); - - if (result.match) successCount++; - totalIoU += result.iou; - } - - const accuracy = targetFiles.length > 0 ? (successCount / targetFiles.length * 100).toFixed(1) : 0; - const avgIoU = targetFiles.length > 0 ? (totalIoU / targetFiles.length).toFixed(3) : 0; - console.log(`\n 准确率: ${successCount}/${targetFiles.length} (${accuracy}%)`); - console.log(` 平均IoU: ${avgIoU}\n`); - } - - // 2. 处理所有豆瓣图片并输出结果 - console.log('2. 处理豆瓣滑块图片...'); - const outputDir = path.join(baseDir, 'images', 'output'); - - if (!fs.existsSync(outputDir)) { - fs.mkdirSync(outputDir, { recursive: true }); - } - - if (fs.existsSync(doubanDir)) { - const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')); - let processedCount = 0; - - for (const file of files) { - const inputPath = path.join(doubanDir, file); - const outputPath = path.join(outputDir, file); - - const box = await detector.detectSlider(inputPath, outputPath); - - if (box) { - console.log(` ✓ ${file}: 检测到滑块 [x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}]`); - processedCount++; - } else { - console.log(` ✗ ${file}: 未检测到滑块`); - } - } - - console.log(`\n 处理完成: ${processedCount}/${files.length} 张图片`); - console.log(` 输出目录: ${outputDir}\n`); - } - - console.log('=== 检测完成 ==='); -} - -main().catch(console.error); diff --git a/src/test-cv.ts b/src/test-cv.ts deleted file mode 100644 index e69de29..0000000 diff --git a/src/test-edge.ts b/src/test-edge.ts deleted file mode 100644 index c80a386..0000000 --- a/src/test-edge.ts +++ /dev/null @@ -1,78 +0,0 @@ -import * as fs from 'fs'; -import * as path from 'path'; -import { EdgeSliderDetector } from './detector-edge'; -import { SliderValidator, BoundingBox as ValidatorBox } from './validator'; - -async function main() { - const detector = new EdgeSliderDetector(); - const validator = new SliderValidator(); - const baseDir = path.join(__dirname, '..'); - const doubanDir = path.join(baseDir, 'images', 'douban'); - const doubanTargetDir = path.join(baseDir, 'images', 'douban-target'); - const outputDir = path.join(baseDir, 'images', 'output-canny'); - - if (!fs.existsSync(outputDir)) { - fs.mkdirSync(outputDir, { recursive: true }); - } - - console.log('=== 测试Canny边缘检测方法 ===\n'); - - const files = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png')); - let totalMatched = 0; - let totalTargets = 0; - let totalDetected = 0; - - for (const file of files) { - const imagePath = path.join(doubanDir, file); - const targetPath = path.join(doubanTargetDir, file); - const outputPath = path.join(outputDir, file); - - if (!fs.existsSync(imagePath)) continue; - - // 获取标准答案 - const targetBoxes = await validator.extractRedBoxes(targetPath); - - // 检测滑块 - const detected = await detector.detectSlider(imagePath, outputPath, true); - const detectedBoxes = detected ? (Array.isArray(detected) ? detected : [detected]) : []; - - // 转换格式 - const detectedValidatorBoxes: ValidatorBox[] = detectedBoxes.map(b => ({ - x: b.x, - y: b.y, - width: b.width, - height: b.height - })); - - // 验证 - const result = await validator.validateDetection(detectedValidatorBoxes, targetBoxes, 10); - - totalMatched += result.matchedCount; - totalTargets += result.totalTargets; - totalDetected += result.detectedCount; - - console.log(`${file}:`); - console.log(` 目标: ${result.totalTargets}, 检测: ${result.detectedCount}, 匹配: ${result.matchedCount}`); - console.log(` 准确率: ${(result.precision * 100).toFixed(1)}%, 召回率: ${(result.recall * 100).toFixed(1)}%`); - - if (result.matchedCount < result.totalTargets) { - console.log(` ⚠️ 漏检: ${result.totalTargets - result.matchedCount}个`); - } - if (result.unmatched.length > 0) { - console.log(` ⚠️ 误检: ${result.unmatched.length}个`); - } - } - - const overallPrecision = totalDetected > 0 ? (totalMatched / totalDetected * 100).toFixed(1) : '0.0'; - const overallRecall = totalTargets > 0 ? (totalMatched / totalTargets * 100).toFixed(1) : '0.0'; - - console.log(`\n总体统计:`); - console.log(` 总目标数: ${totalTargets}个`); - console.log(` 总检测数: ${totalDetected}个`); - console.log(` 成功匹配: ${totalMatched}个`); - console.log(` 准确率(Precision): ${overallPrecision}%`); - console.log(` 召回率(Recall): ${overallRecall}%`); - console.log(`\n输出目录: ${outputDir}`); -} - -main().catch(console.error); diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000..7d9cd93 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,17 @@ +export interface Rectangle { + x: number; + y: number; + width: number; + height: number; +} + +export interface BoundingBox extends Rectangle { + score: number; +} + +export interface RawImage { + data: Buffer; + width: number; + height: number; + channels: number; +} diff --git a/src/utils/geometry.ts b/src/utils/geometry.ts new file mode 100644 index 0000000..04f3bbb --- /dev/null +++ b/src/utils/geometry.ts @@ -0,0 +1,16 @@ +import { Rectangle } from '../types'; + +/** + * Calculate intersection over union for two bounding boxes. + */ +export function calculateIoU(a: Rectangle, b: Rectangle): number { + const x1 = Math.max(a.x, b.x); + const y1 = Math.max(a.y, b.y); + const x2 = Math.min(a.x + a.width, b.x + b.width); + const y2 = Math.min(a.y + a.height, b.y + b.height); + + const intersection = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); + const union = a.width * a.height + b.width * b.height - intersection; + + return union === 0 ? 0 : intersection / union; +} diff --git a/src/utils/image.ts b/src/utils/image.ts new file mode 100644 index 0000000..2b1c37f --- /dev/null +++ b/src/utils/image.ts @@ -0,0 +1,136 @@ +import { RawImage } from '../types'; + +/** + * Convert RGB data to grayscale array. + */ +function toGrayscale( + data: Buffer, + width: number, + height: number, + channels: number +): Uint8Array { + const gray = new Uint8Array(width * height); + for (let i = 0; i < width * height; i++) { + const idx = i * channels; + gray[i] = Math.round( + data[idx] * 0.299 + data[idx + 1] * 0.587 + data[idx + 2] * 0.114 + ); + } + return gray; +} + +/** + * Produce a Sobel edge map from raw RGB data. + */ +export function createEdgeMap({ + data, + width, + height, + channels, +}: RawImage): Uint8Array { + const gray = toGrayscale(data, width, height, channels); + const edges = new Uint8Array(width * height); + + for (let y = 1; y < height - 1; y++) { + for (let x = 1; x < width - 1; x++) { + const idx = y * width + x; + const gx = + -gray[(y - 1) * width + (x - 1)] + + gray[(y - 1) * width + (x + 1)] - + 2 * gray[idx - 1] + + 2 * gray[idx + 1] - + gray[(y + 1) * width + (x - 1)] + + gray[(y + 1) * width + (x + 1)]; + + const gy = + -gray[(y - 1) * width + (x - 1)] - + 2 * gray[(y - 1) * width + x] - + gray[(y - 1) * width + (x + 1)] + + gray[(y + 1) * width + (x - 1)] + + 2 * gray[(y + 1) * width + x] + + gray[(y + 1) * width + (x + 1)]; + + const magnitude = Math.sqrt(gx * gx + gy * gy); + edges[idx] = magnitude > 40 ? 1 : 0; + } + } + + return edges; +} + +/** + * Morphological closing (dilate followed by erode). + */ +export function morphologyClose( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number +): Uint8Array { + const dilated = dilate(binary, width, height, kernelSize); + return erode(dilated, width, height, kernelSize); +} + +export function dilate( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number +): Uint8Array { + const result = new Uint8Array(width * height); + const offset = Math.floor(kernelSize / 2); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + let maxVal = 0; + + for (let ky = -offset; ky <= offset; ky++) { + for (let kx = -offset; kx <= offset; kx++) { + const ny = y + ky; + const nx = x + kx; + + if (nx >= 0 && nx < width && ny >= 0 && ny < height) { + maxVal = Math.max(maxVal, binary[ny * width + nx]); + } + } + } + + result[y * width + x] = maxVal; + } + } + + return result; +} + +export function erode( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number +): Uint8Array { + const result = new Uint8Array(width * height); + const offset = Math.floor(kernelSize / 2); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + let minVal = 1; + + for (let ky = -offset; ky <= offset; ky++) { + for (let kx = -offset; kx <= offset; kx++) { + const ny = y + ky; + const nx = x + kx; + + if (nx >= 0 && nx < width && ny >= 0 && ny < height) { + minVal = Math.min(minVal, binary[ny * width + nx]); + } + } + } + + result[y * width + x] = minVal; + } + } + + return result; +} + +export { toGrayscale }; diff --git a/src/validator.ts b/src/validator.ts index 44fc8de..c6f7fd4 100644 --- a/src/validator.ts +++ b/src/validator.ts @@ -1,110 +1,12 @@ -import sharp from 'sharp'; -import * as fs from 'fs'; -import * as path from 'path'; - -interface BoundingBox { - x: number; - y: number; - width: number; - height: number; -} +import { BoundingBox, Rectangle } from './types'; +import { calculateIoU } from './utils/geometry'; class SliderValidator { - /** - * 从标注图像中提取所有红框 - */ - async extractRedBoxes(imagePath: string): Promise { - const { data, info } = await sharp(imagePath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - // 创建红色像素地图 - const redMap = new Uint8Array(width * height); - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - - // 检测红色像素 - redMap[y * width + x] = (r > 200 && g < 100 && b < 100) ? 1 : 0; - } - } - - // 使用连通区域分析找到所有红框 - const visited = new Uint8Array(width * height); - const boxes: BoundingBox[] = []; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = y * width + x; - if (visited[idx] === 0 && redMap[idx] === 1) { - const box = this.floodFill(redMap, visited, x, y, width, height); - if (box.width > 10 && box.height > 10) { - boxes.push(box); - } - } - } - } - - return boxes; - } - - /** - * 洪水填充算法 - */ - private floodFill( - binary: Uint8Array, - visited: Uint8Array, - startX: number, - startY: number, - width: number, - height: number - ): BoundingBox { - let minX = startX; - let minY = startY; - let maxX = startX; - let maxY = startY; - - const stack: Array<[number, number]> = [[startX, startY]]; - - while (stack.length > 0) { - const [x, y] = stack.pop()!; - - if (x < 0 || x >= width || y < 0 || y >= height) continue; - - const idx = y * width + x; - if (visited[idx] === 1) continue; - if (binary[idx] !== 1) continue; - - visited[idx] = 1; - - minX = Math.min(minX, x); - minY = Math.min(minY, y); - maxX = Math.max(maxX, x); - maxY = Math.max(maxY, y); - - stack.push([x + 1, y]); - stack.push([x - 1, y]); - stack.push([x, y + 1]); - stack.push([x, y - 1]); - } - - return { - x: minX, - y: minY, - width: maxX - minX + 1, - height: maxY - minY + 1 - }; - } /** * 检查两个框是否匹配(允许一定偏差) */ - isBoxMatching(detected: BoundingBox, target: BoundingBox, tolerance: number = 10): boolean { + isBoxMatching(detected: Rectangle, target: Rectangle, tolerance: number = 10): boolean { // 计算中心点 const detectedCenterX = detected.x + detected.width / 2; const detectedCenterY = detected.y + detected.height / 2; @@ -129,26 +31,16 @@ class SliderValidator { /** * 计算IoU(交并比) */ - calculateIoU(box1: BoundingBox, box2: BoundingBox): number { - const x1 = Math.max(box1.x, box2.x); - const y1 = Math.max(box1.y, box2.y); - const x2 = Math.min(box1.x + box1.width, box2.x + box2.width); - const y2 = Math.min(box1.y + box1.height, box2.y + box2.height); - - const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); - const box1Area = box1.width * box1.height; - const box2Area = box2.width * box2.height; - const unionArea = box1Area + box2Area - intersectionArea; - - return intersectionArea / unionArea; + calculateIoU(box1: Rectangle, box2: Rectangle): number { + return calculateIoU(box1, box2); } /** * 验证检测结果 */ async validateDetection( - detectedBoxes: BoundingBox[], - targetBoxes: BoundingBox[], + detectedBoxes: Rectangle[], + targetBoxes: Rectangle[], tolerance: number = 10 ): Promise<{ totalTargets: number; @@ -156,39 +48,39 @@ class SliderValidator { matchedCount: number; precision: number; recall: number; - matches: Array<{ detected: BoundingBox; target: BoundingBox; iou: number }>; - unmatched: BoundingBox[]; + matches: Array<{ detected: Rectangle; target: Rectangle; iou: number }>; + unmatched: Rectangle[]; }> { - const matches: Array<{ detected: BoundingBox; target: BoundingBox; iou: number }> = []; + const matches: Array<{ detected: Rectangle; target: Rectangle; iou: number }> = []; const matchedTargets = new Set(); const matchedDetected = new Set(); - // 为每个检测框找到最佳匹配的目标框 + // 1. 找出所有可能的匹配对 + const potentialMatches: Array<{ detIdx: number; tarIdx: number; iou: number }> = []; for (let i = 0; i < detectedBoxes.length; i++) { - const detected = detectedBoxes[i]; - let bestMatch = -1; - let bestIoU = 0; - for (let j = 0; j < targetBoxes.length; j++) { - if (matchedTargets.has(j)) continue; - - if (this.isBoxMatching(detected, targetBoxes[j], tolerance)) { - const iou = this.calculateIoU(detected, targetBoxes[j]); - if (iou > bestIoU) { - bestIoU = iou; - bestMatch = j; + if (this.isBoxMatching(detectedBoxes[i], targetBoxes[j], tolerance)) { + const iou = this.calculateIoU(detectedBoxes[i], targetBoxes[j]); + if (iou > 0.1) { // 设置一个IoU的下限 + potentialMatches.push({ detIdx: i, tarIdx: j, iou }); } } } + } - if (bestMatch >= 0) { + // 2. 按IoU从高到低排序 + potentialMatches.sort((a, b) => b.iou - a.iou); + + // 3. 贪心选择最佳匹配 + for (const match of potentialMatches) { + if (!matchedDetected.has(match.detIdx) && !matchedTargets.has(match.tarIdx)) { matches.push({ - detected, - target: targetBoxes[bestMatch], - iou: bestIoU + detected: detectedBoxes[match.detIdx], + target: targetBoxes[match.tarIdx], + iou: match.iou }); - matchedTargets.add(bestMatch); - matchedDetected.add(i); + matchedDetected.add(match.detIdx); + matchedTargets.add(match.tarIdx); } } @@ -210,4 +102,4 @@ class SliderValidator { } } -export { SliderValidator, BoundingBox }; +export { SliderValidator, BoundingBox, Rectangle }; diff --git a/src/visualize.ts b/src/visualize.ts deleted file mode 100644 index d6e121e..0000000 --- a/src/visualize.ts +++ /dev/null @@ -1,98 +0,0 @@ -import sharp from 'sharp'; -import * as fs from 'fs'; -import * as path from 'path'; - -async function visualizeRedBox(imagePath: string, targetPath: string, outputPath: string) { - // 从target图像提取红框 - const { data, info } = await sharp(targetPath) - .raw() - .toBuffer({ resolveWithObject: true }); - - const { width, height, channels } = info; - - let minX = width, minY = height, maxX = 0, maxY = 0; - let foundRed = false; - - for (let y = 0; y < height; y++) { - for (let x = 0; x < width; x++) { - const idx = (y * width + x) * channels; - const r = data[idx]; - const g = data[idx + 1]; - const b = data[idx + 2]; - - if (r > 200 && g < 100 && b < 100) { - foundRed = true; - minX = Math.min(minX, x); - minY = Math.min(minY, y); - maxX = Math.max(maxX, x); - maxY = Math.max(maxY, y); - } - } - } - - if (!foundRed) { - console.log('No red box found'); - return; - } - - // 在原图上绘制红框 - const image = sharp(imagePath); - const metadata = await image.metadata(); - - const boxWidth = maxX - minX + 1; - const boxHeight = maxY - minY + 1; - - const svg = ` - - - - ${boxWidth}x${boxHeight} - - - `; - - await image - .composite([{ - input: Buffer.from(svg), - top: 0, - left: 0 - }]) - .toFile(outputPath); - - console.log(`Saved: ${path.basename(outputPath)}`); -} - -async function main() { - const baseDir = path.join(__dirname, '..'); - const doubanDir = path.join(baseDir, 'images', 'douban'); - const targetDir = path.join(baseDir, 'images', 'douban-target'); - const visualDir = path.join(baseDir, 'images', 'visual'); - - if (!fs.existsSync(visualDir)) { - fs.mkdirSync(visualDir, { recursive: true }); - } - - const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png')).slice(0, 9); - - for (const file of files) { - const imagePath = path.join(doubanDir, file); - const targetPath = path.join(targetDir, file); - const outputPath = path.join(visualDir, file); - - if (fs.existsSync(imagePath)) { - await visualizeRedBox(imagePath, targetPath, outputPath); - } - } - - console.log(`\nVisualization saved to: ${visualDir}`); -} - -main().catch(console.error);