commit 7a4f659dda324ac5b54fc5d4c3ad16e38edc75e1 Author: douboer Date: Sat Oct 25 15:53:29 2025 +0800 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..76f4b11 --- /dev/null +++ b/.gitignore @@ -0,0 +1,54 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +node_modules +/.pnp +.pnp.js + +# testing +/coverage + +# production +/build +dist + +# misc +.DS_Store +.env.local +.env.development.local +.env.test.local +.env.production.local + +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Editor directories and files +.idea +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? + +# mockm +httpData + +public/upload/** +!public/upload/*.gitkeep +.history + +# Package manager lock file +package-lock.json +yarn.lock +# pnpm-lock.yaml +auto-imports.d.ts +components.d.ts + +.wxt +.output +web-ext.config.ts +.wrangler + +# vite-plugin-pwa dev output +dev-dist diff --git a/README.md b/README.md new file mode 100644 index 0000000..dcb60a3 --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +# 豆瓣滑块验证码检测器 + +基于 sharp 图像处理库实现的滑块验证码缺口精确识别工具。 + +## 功能特性 + +- ✅ 自动检测图片中的滑块缺口位置 +- ✅ 支持多滑块检测(一张图片中检测多个滑块) +- ✅ 使用多阈值策略提高检测准确率 +- ✅ 自动在图片上绘制蓝色边框标注检测结果 +- ✅ 支持人工标注对比验证 + +## 安装依赖 + +```bash +npm install +``` + +## 使用方法 + +### 运行检测 + +```bash +npm run detect +``` + +该命令会: +1. 验证算法准确性(使用 `images/douban-target` 中的红框标注图片) +2. 处理 `images/douban` 目录中的所有图片 +3. 将检测结果(带蓝色边框)输出到 `images/output` 目录 + +### 目录结构 + +``` +images/ + ├── douban/ # 豆瓣滑块真实截图(待检测) + ├── douban-target/ # 红框标注图片(用于验证算法准确性) + ├── slider/ # 滑块形状模板图片 + ├── target/ # 滑块标注图片 + └── output/ # 检测结果输出目录(蓝色框标注) +``` + +## 检测算法 + +### 核心思路 + +1. **暗色区域检测**:滑块缺口通常为暗色(阴影)区域 +2. **多阈值策略**:使用 75、90、105 三个亮度阈值进行检测,提高鲁棒性 +3. **形态学处理**:使用闭运算(先膨胀后腐蚀)去除噪点 +4. **连通区域分析**:使用洪水填充算法查找所有暗色连通区域 +5. **特征筛选**: + - 尺寸范围:50-95 像素(宽度和高度) + - 宽高比:0.8-1.25(接近正方形) + - 位置:图片中部区域(15%-75%高度) + - 密度:像素填充率 > 55% + +### 检测结果 + +当前性能(在 9 张测试图片上): +- **验证准确率**:55.6%(5/9) +- **检测完成率**:66.7%(6/9) +- 支持多滑块检测(滑块-1.png 检测到 2 个滑块) + +## 开发 + +### 编译 + +```bash +npm run build +``` + +### 调试 + +查看未检测到的图片特征: +```bash +npx ts-node --transpile-only src/debug-threshold.ts +``` + +分析红框标注信息: +```bash +npx ts-node --transpile-only src/analyze.ts +``` + +## 技术栈 + +- TypeScript +- Sharp (图像处理) +- Node.js + +## 优化方向 + +1. 提高对低对比度图片的检测能力 +2. 优化形态学操作参数 +3. 增加边缘检测辅助判断 +4. 支持更多滑块形状变体 diff --git a/ground-truth.json b/ground-truth.json new file mode 100644 index 0000000..8f49fcc --- /dev/null +++ b/ground-truth.json @@ -0,0 +1,128 @@ +{ + "滑块-1.png": [ + { + "x": 123, + "y": 439, + "width": 90, + "height": 92 + }, + { + "x": 546, + "y": 439, + "width": 90, + "height": 92 + } + ], + "滑块-2.png": [ + { + "x": 454, + "y": 244, + "width": 90, + "height": 92 + }, + { + "x": 125, + "y": 245, + "width": 89, + "height": 91 + } + ], + "滑块-3.png": [ + { + "x": 576, + "y": 237, + "width": 87, + "height": 88 + }, + { + "x": 122, + "y": 238, + "width": 86, + "height": 87 + } + ], + "滑块-4.png": [ + { + "x": 488, + "y": 329, + "width": 91, + "height": 91 + }, + { + "x": 120, + "y": 330, + "width": 90, + "height": 90 + } + ], + "滑块-5.png": [ + { + "x": 404, + "y": 443, + "width": 91, + "height": 89 + }, + { + "x": 119, + "y": 444, + "width": 90, + "height": 88 + } + ], + "滑块-6.png": [ + { + "x": 574, + "y": 318, + "width": 92, + "height": 92 + }, + { + "x": 116, + "y": 319, + "width": 91, + "height": 91 + } + ], + "滑块-7.png": [ + { + "x": 349, + "y": 177, + "width": 101, + "height": 166 + }, + { + "x": 119, + "y": 255, + "width": 88, + "height": 88 + } + ], + "滑块-8.png": [ + { + "x": 434, + "y": 243, + "width": 93, + "height": 93 + }, + { + "x": 120, + "y": 244, + "width": 92, + "height": 92 + } + ], + "滑块.png": [ + { + "x": 375, + "y": 407, + "width": 88, + "height": 89 + }, + { + "x": 131, + "y": 408, + "width": 87, + "height": 88 + } + ] +} \ No newline at end of file diff --git a/images/debug/template-滑块-1.png b/images/debug/template-滑块-1.png new file mode 100644 index 0000000..093f664 Binary files /dev/null and b/images/debug/template-滑块-1.png differ diff --git a/images/debug/template-滑块-2.png b/images/debug/template-滑块-2.png new file mode 100644 index 0000000..5db92c3 Binary files /dev/null and b/images/debug/template-滑块-2.png differ diff --git a/images/debug/template-滑块-4.png b/images/debug/template-滑块-4.png new file mode 100644 index 0000000..6c55392 Binary files /dev/null and b/images/debug/template-滑块-4.png differ diff --git a/images/debug/template-滑块-5.png b/images/debug/template-滑块-5.png new file mode 100644 index 0000000..e3dbcfd Binary files /dev/null and b/images/debug/template-滑块-5.png differ diff --git a/images/debug/template-滑块-6.png b/images/debug/template-滑块-6.png new file mode 100644 index 0000000..881ef08 Binary files /dev/null and b/images/debug/template-滑块-6.png differ diff --git a/images/debug/template-滑块-7.png b/images/debug/template-滑块-7.png new file mode 100644 index 0000000..89e5a6d Binary files /dev/null and b/images/debug/template-滑块-7.png differ diff --git a/images/debug/template-滑块-8.png b/images/debug/template-滑块-8.png new file mode 100644 index 0000000..b0d55da Binary files /dev/null and b/images/debug/template-滑块-8.png differ diff --git a/images/debug/template-滑块.png b/images/debug/template-滑块.png new file mode 100644 index 0000000..10a065b Binary files /dev/null and b/images/debug/template-滑块.png differ diff --git a/images/debug/滑块-1_edge.png b/images/debug/滑块-1_edge.png new file mode 100644 index 0000000..3b39ab7 Binary files /dev/null and b/images/debug/滑块-1_edge.png differ diff --git a/images/debug/滑块-1_gray.png b/images/debug/滑块-1_gray.png new file mode 100644 index 0000000..91b8981 Binary files /dev/null and b/images/debug/滑块-1_gray.png differ diff --git a/images/debug/滑块-2_edge.png b/images/debug/滑块-2_edge.png new file mode 100644 index 0000000..dc9bc7a Binary files /dev/null and b/images/debug/滑块-2_edge.png differ diff --git a/images/debug/滑块-2_gray.png b/images/debug/滑块-2_gray.png new file mode 100644 index 0000000..3e95ab1 Binary files /dev/null and b/images/debug/滑块-2_gray.png differ diff --git a/images/debug/滑块-3_edge.png b/images/debug/滑块-3_edge.png new file mode 100644 index 0000000..e6ef187 Binary files /dev/null and b/images/debug/滑块-3_edge.png differ diff --git a/images/debug/滑块-3_gray.png b/images/debug/滑块-3_gray.png new file mode 100644 index 0000000..182c8af Binary files /dev/null and b/images/debug/滑块-3_gray.png differ diff --git a/images/douban-target/滑块-1.png b/images/douban-target/滑块-1.png new file mode 100644 index 0000000..d016792 Binary files /dev/null and b/images/douban-target/滑块-1.png differ diff --git a/images/douban-target/滑块-2.png b/images/douban-target/滑块-2.png new file mode 100644 index 0000000..0616886 Binary files /dev/null and b/images/douban-target/滑块-2.png differ diff --git a/images/douban-target/滑块-3.png b/images/douban-target/滑块-3.png new file mode 100644 index 0000000..3258228 Binary files /dev/null and b/images/douban-target/滑块-3.png differ diff --git a/images/douban-target/滑块-4.png b/images/douban-target/滑块-4.png new file mode 100644 index 0000000..efd90f3 Binary files /dev/null and b/images/douban-target/滑块-4.png differ diff --git a/images/douban-target/滑块-5.png b/images/douban-target/滑块-5.png new file mode 100644 index 0000000..641d6c5 Binary files /dev/null and b/images/douban-target/滑块-5.png differ diff --git a/images/douban-target/滑块-6.png b/images/douban-target/滑块-6.png new file mode 100644 index 0000000..d2f586f Binary files /dev/null and b/images/douban-target/滑块-6.png differ diff --git a/images/douban-target/滑块-7.png b/images/douban-target/滑块-7.png new file mode 100644 index 0000000..160a760 Binary files /dev/null and b/images/douban-target/滑块-7.png differ diff --git a/images/douban-target/滑块-8.png b/images/douban-target/滑块-8.png new file mode 100644 index 0000000..0ab06be Binary files /dev/null and b/images/douban-target/滑块-8.png differ diff --git a/images/douban-target/滑块.png b/images/douban-target/滑块.png new file mode 100644 index 0000000..d20f865 Binary files /dev/null and b/images/douban-target/滑块.png differ diff --git a/images/douban/滑块-1.png b/images/douban/滑块-1.png new file mode 100644 index 0000000..7e9e228 Binary files /dev/null and b/images/douban/滑块-1.png differ diff --git a/images/douban/滑块-2.png b/images/douban/滑块-2.png new file mode 100644 index 0000000..483ab51 Binary files /dev/null and b/images/douban/滑块-2.png differ diff --git a/images/douban/滑块-3.png b/images/douban/滑块-3.png new file mode 100644 index 0000000..5be76e9 Binary files /dev/null and b/images/douban/滑块-3.png differ diff --git a/images/douban/滑块-4.png b/images/douban/滑块-4.png new file mode 100644 index 0000000..4000d2b Binary files /dev/null and b/images/douban/滑块-4.png differ diff --git a/images/douban/滑块-5.png b/images/douban/滑块-5.png new file mode 100644 index 0000000..8b9e93a Binary files /dev/null and b/images/douban/滑块-5.png differ diff --git a/images/douban/滑块-6.png b/images/douban/滑块-6.png new file mode 100644 index 0000000..b1f8248 Binary files /dev/null and b/images/douban/滑块-6.png differ diff --git a/images/douban/滑块-7.png b/images/douban/滑块-7.png new file mode 100644 index 0000000..c9ed2cf Binary files /dev/null and b/images/douban/滑块-7.png differ diff --git a/images/douban/滑块-8.png b/images/douban/滑块-8.png new file mode 100644 index 0000000..f0d1ecf Binary files /dev/null and b/images/douban/滑块-8.png differ diff --git a/images/douban/滑块.png b/images/douban/滑块.png new file mode 100644 index 0000000..9c163bf Binary files /dev/null and b/images/douban/滑块.png differ diff --git a/images/output-canny/滑块-1.png b/images/output-canny/滑块-1.png new file mode 100644 index 0000000..ebdf208 Binary files /dev/null and b/images/output-canny/滑块-1.png differ diff --git a/images/output-canny/滑块-2.png b/images/output-canny/滑块-2.png new file mode 100644 index 0000000..bbe49a2 Binary files /dev/null and b/images/output-canny/滑块-2.png differ diff --git a/images/output-canny/滑块-3.png b/images/output-canny/滑块-3.png new file mode 100644 index 0000000..a9d120f Binary files /dev/null and b/images/output-canny/滑块-3.png differ diff --git a/images/output-canny/滑块-4.png b/images/output-canny/滑块-4.png new file mode 100644 index 0000000..75eeb4a Binary files /dev/null and b/images/output-canny/滑块-4.png differ diff --git a/images/output-canny/滑块-5.png b/images/output-canny/滑块-5.png new file mode 100644 index 0000000..461789e Binary files /dev/null and b/images/output-canny/滑块-5.png differ diff --git a/images/output-canny/滑块-6.png b/images/output-canny/滑块-6.png new file mode 100644 index 0000000..4907f06 Binary files /dev/null and b/images/output-canny/滑块-6.png differ diff --git a/images/output-canny/滑块-7.png b/images/output-canny/滑块-7.png new file mode 100644 index 0000000..f89846b Binary files /dev/null and b/images/output-canny/滑块-7.png differ diff --git a/images/output-canny/滑块-8.png b/images/output-canny/滑块-8.png new file mode 100644 index 0000000..64b351a Binary files /dev/null and b/images/output-canny/滑块-8.png differ diff --git a/images/output-canny/滑块.png b/images/output-canny/滑块.png new file mode 100644 index 0000000..147c4b4 Binary files /dev/null and b/images/output-canny/滑块.png differ diff --git a/images/output-cv/滑块-1.png b/images/output-cv/滑块-1.png new file mode 100644 index 0000000..2265cee Binary files /dev/null and b/images/output-cv/滑块-1.png differ diff --git a/images/output-cv/滑块-4.png b/images/output-cv/滑块-4.png new file mode 100644 index 0000000..6315b3c Binary files /dev/null and b/images/output-cv/滑块-4.png differ diff --git a/images/output-cv/滑块.png b/images/output-cv/滑块.png new file mode 100644 index 0000000..9888372 Binary files /dev/null and b/images/output-cv/滑块.png differ diff --git a/images/output-edge/滑块-1.png b/images/output-edge/滑块-1.png new file mode 100644 index 0000000..7e5f36e Binary files /dev/null and b/images/output-edge/滑块-1.png differ diff --git a/images/output-edge/滑块-2.png b/images/output-edge/滑块-2.png new file mode 100644 index 0000000..3da96ea Binary files /dev/null and b/images/output-edge/滑块-2.png differ diff --git a/images/output-edge/滑块-3.png b/images/output-edge/滑块-3.png new file mode 100644 index 0000000..b6b1d20 Binary files /dev/null and b/images/output-edge/滑块-3.png differ diff --git a/images/output-edge/滑块-4.png b/images/output-edge/滑块-4.png new file mode 100644 index 0000000..cb2ba29 Binary files /dev/null and b/images/output-edge/滑块-4.png differ diff --git a/images/output-edge/滑块-5.png b/images/output-edge/滑块-5.png new file mode 100644 index 0000000..0ce2e52 Binary files /dev/null and b/images/output-edge/滑块-5.png differ diff --git a/images/output-edge/滑块-6.png b/images/output-edge/滑块-6.png new file mode 100644 index 0000000..67e514f Binary files /dev/null and b/images/output-edge/滑块-6.png differ diff --git a/images/output/滑块-1.png b/images/output/滑块-1.png new file mode 100644 index 0000000..50f9190 Binary files /dev/null and b/images/output/滑块-1.png differ diff --git a/images/output/滑块-2.png b/images/output/滑块-2.png new file mode 100644 index 0000000..c626d46 Binary files /dev/null and b/images/output/滑块-2.png differ diff --git a/images/output/滑块-3.png b/images/output/滑块-3.png new file mode 100644 index 0000000..edd483d Binary files /dev/null and b/images/output/滑块-3.png differ diff --git a/images/output/滑块-4.png b/images/output/滑块-4.png new file mode 100644 index 0000000..83721d6 Binary files /dev/null and b/images/output/滑块-4.png differ diff --git a/images/output/滑块-5.png b/images/output/滑块-5.png new file mode 100644 index 0000000..a6a0eb8 Binary files /dev/null and b/images/output/滑块-5.png differ diff --git a/images/output/滑块-6.png b/images/output/滑块-6.png new file mode 100644 index 0000000..b3f0b84 Binary files /dev/null and b/images/output/滑块-6.png differ diff --git a/images/output/滑块-7.png b/images/output/滑块-7.png new file mode 100644 index 0000000..e313dc9 Binary files /dev/null and b/images/output/滑块-7.png differ diff --git a/images/output/滑块-8.png b/images/output/滑块-8.png new file mode 100644 index 0000000..2fedeba Binary files /dev/null and b/images/output/滑块-8.png differ diff --git a/images/output/滑块.png b/images/output/滑块.png new file mode 100644 index 0000000..4ccc9fd Binary files /dev/null and b/images/output/滑块.png differ diff --git a/images/slider/421slider.png b/images/slider/421slider.png new file mode 100644 index 0000000..859e890 Binary files /dev/null and b/images/slider/421slider.png differ diff --git a/images/slider/422slider.png b/images/slider/422slider.png new file mode 100644 index 0000000..3ec82ff Binary files /dev/null and b/images/slider/422slider.png differ diff --git a/images/slider/423slider.png b/images/slider/423slider.png new file mode 100644 index 0000000..4208968 Binary files /dev/null and b/images/slider/423slider.png differ diff --git a/images/slider/424slider.png b/images/slider/424slider.png new file mode 100644 index 0000000..565752e Binary files /dev/null and b/images/slider/424slider.png differ diff --git a/images/slider/425slider.png b/images/slider/425slider.png new file mode 100644 index 0000000..24945b7 Binary files /dev/null and b/images/slider/425slider.png differ diff --git a/images/slider/426slider.png b/images/slider/426slider.png new file mode 100644 index 0000000..9a4d4ab Binary files /dev/null and b/images/slider/426slider.png differ diff --git a/images/slider/427slider.png b/images/slider/427slider.png new file mode 100644 index 0000000..b399f97 Binary files /dev/null and b/images/slider/427slider.png differ diff --git a/images/slider/428slider.png b/images/slider/428slider.png new file mode 100644 index 0000000..ca9ff01 Binary files /dev/null and b/images/slider/428slider.png differ diff --git a/images/slider/429slider.png b/images/slider/429slider.png new file mode 100644 index 0000000..b399f97 Binary files /dev/null and b/images/slider/429slider.png differ diff --git a/images/slider/430slider.png b/images/slider/430slider.png new file mode 100644 index 0000000..4876779 Binary files /dev/null and b/images/slider/430slider.png differ diff --git a/images/slider/431slider.png b/images/slider/431slider.png new file mode 100644 index 0000000..a0dbb4f Binary files /dev/null and b/images/slider/431slider.png differ diff --git a/images/slider/432slider.png b/images/slider/432slider.png new file mode 100644 index 0000000..6a137fb Binary files /dev/null and b/images/slider/432slider.png differ diff --git a/images/slider/433slider.png b/images/slider/433slider.png new file mode 100644 index 0000000..5219071 Binary files /dev/null and b/images/slider/433slider.png differ diff --git a/images/slider/434slider.png b/images/slider/434slider.png new file mode 100644 index 0000000..3c1c3cf Binary files /dev/null and b/images/slider/434slider.png differ diff --git a/images/slider/435slider.png b/images/slider/435slider.png new file mode 100644 index 0000000..0a86a29 Binary files /dev/null and b/images/slider/435slider.png differ diff --git a/images/slider/436slider.png b/images/slider/436slider.png new file mode 100644 index 0000000..9f4e235 Binary files /dev/null and b/images/slider/436slider.png differ diff --git a/images/slider/437slider.png b/images/slider/437slider.png new file mode 100644 index 0000000..1a18400 Binary files /dev/null and b/images/slider/437slider.png differ diff --git a/images/slider/438slider.png b/images/slider/438slider.png new file mode 100644 index 0000000..0cc21a3 Binary files /dev/null and b/images/slider/438slider.png differ diff --git a/images/slider/439slider.png b/images/slider/439slider.png new file mode 100644 index 0000000..6fd4f10 Binary files /dev/null and b/images/slider/439slider.png differ diff --git a/images/slider/440slider.png b/images/slider/440slider.png new file mode 100644 index 0000000..cf8cc6c Binary files /dev/null and b/images/slider/440slider.png differ diff --git a/images/slider/441slider.png b/images/slider/441slider.png new file mode 100644 index 0000000..224a268 Binary files /dev/null and b/images/slider/441slider.png differ diff --git a/images/slider/442slider.png b/images/slider/442slider.png new file mode 100644 index 0000000..9def5dc Binary files /dev/null and b/images/slider/442slider.png differ diff --git a/images/slider/443slider.png b/images/slider/443slider.png new file mode 100644 index 0000000..29921d5 Binary files /dev/null and b/images/slider/443slider.png differ diff --git a/images/slider/444slider.png b/images/slider/444slider.png new file mode 100644 index 0000000..9ea3af3 Binary files /dev/null and b/images/slider/444slider.png differ diff --git a/images/slider/445slider.png b/images/slider/445slider.png new file mode 100644 index 0000000..a14cbad Binary files /dev/null and b/images/slider/445slider.png differ diff --git a/images/slider/446slider.png b/images/slider/446slider.png new file mode 100644 index 0000000..13ab19e Binary files /dev/null and b/images/slider/446slider.png differ diff --git a/images/slider/447slider.png b/images/slider/447slider.png new file mode 100644 index 0000000..869adec Binary files /dev/null and b/images/slider/447slider.png differ diff --git a/images/slider/448slider.png b/images/slider/448slider.png new file mode 100644 index 0000000..c384161 Binary files /dev/null and b/images/slider/448slider.png differ diff --git a/images/slider/449slider.png b/images/slider/449slider.png new file mode 100644 index 0000000..0cc21a3 Binary files /dev/null and b/images/slider/449slider.png differ diff --git a/images/slider/450slider.png b/images/slider/450slider.png new file mode 100644 index 0000000..94009b6 Binary files /dev/null and b/images/slider/450slider.png differ diff --git a/images/slider/451slider.png b/images/slider/451slider.png new file mode 100644 index 0000000..76f8f4c Binary files /dev/null and b/images/slider/451slider.png differ diff --git a/images/slider/452slider.png b/images/slider/452slider.png new file mode 100644 index 0000000..ed7314f Binary files /dev/null and b/images/slider/452slider.png differ diff --git a/images/slider/453slider.png b/images/slider/453slider.png new file mode 100644 index 0000000..1d13f73 Binary files /dev/null and b/images/slider/453slider.png differ diff --git a/images/slider/454slider.png b/images/slider/454slider.png new file mode 100644 index 0000000..492c361 Binary files /dev/null and b/images/slider/454slider.png differ diff --git a/images/slider/455slider.png b/images/slider/455slider.png new file mode 100644 index 0000000..871fd9b Binary files /dev/null and b/images/slider/455slider.png differ diff --git a/images/slider/456slider.png b/images/slider/456slider.png new file mode 100644 index 0000000..7a2c1a4 Binary files /dev/null and b/images/slider/456slider.png differ diff --git a/images/slider/457slider.png b/images/slider/457slider.png new file mode 100644 index 0000000..55c3655 Binary files /dev/null and b/images/slider/457slider.png differ diff --git a/images/slider/458slider.png b/images/slider/458slider.png new file mode 100644 index 0000000..bd97bbd Binary files /dev/null and b/images/slider/458slider.png differ diff --git a/images/slider/459slider.png b/images/slider/459slider.png new file mode 100644 index 0000000..279e8e5 Binary files /dev/null and b/images/slider/459slider.png differ diff --git a/images/slider/460slider.png b/images/slider/460slider.png new file mode 100644 index 0000000..898edbe Binary files /dev/null and b/images/slider/460slider.png differ diff --git a/images/slider/461slider.png b/images/slider/461slider.png new file mode 100644 index 0000000..0e8d158 Binary files /dev/null and b/images/slider/461slider.png differ diff --git a/images/slider/462slider.png b/images/slider/462slider.png new file mode 100644 index 0000000..e3b1ce4 Binary files /dev/null and b/images/slider/462slider.png differ diff --git a/images/slider/463slider.png b/images/slider/463slider.png new file mode 100644 index 0000000..a9f2fda Binary files /dev/null and b/images/slider/463slider.png differ diff --git a/images/slider/464slider.png b/images/slider/464slider.png new file mode 100644 index 0000000..96e826e Binary files /dev/null and b/images/slider/464slider.png differ diff --git a/images/slider/465slider.png b/images/slider/465slider.png new file mode 100644 index 0000000..8d39622 Binary files /dev/null and b/images/slider/465slider.png differ diff --git a/images/slider/466slider.png b/images/slider/466slider.png new file mode 100644 index 0000000..25e1a73 Binary files /dev/null and b/images/slider/466slider.png differ diff --git a/images/slider/467slider.png b/images/slider/467slider.png new file mode 100644 index 0000000..1c8cf81 Binary files /dev/null and b/images/slider/467slider.png differ diff --git a/images/slider/468slider.png b/images/slider/468slider.png new file mode 100644 index 0000000..c7c7895 Binary files /dev/null and b/images/slider/468slider.png differ diff --git a/images/slider/469slider.png b/images/slider/469slider.png new file mode 100644 index 0000000..f9237f7 Binary files /dev/null and b/images/slider/469slider.png differ diff --git a/images/slider/470slider.png b/images/slider/470slider.png new file mode 100644 index 0000000..9a4d4ab Binary files /dev/null and b/images/slider/470slider.png differ diff --git a/images/slider/471slider.png b/images/slider/471slider.png new file mode 100644 index 0000000..d7c7738 Binary files /dev/null and b/images/slider/471slider.png differ diff --git a/images/slider/472slider.png b/images/slider/472slider.png new file mode 100644 index 0000000..716b0d7 Binary files /dev/null and b/images/slider/472slider.png differ diff --git a/images/slider/473slider.png b/images/slider/473slider.png new file mode 100644 index 0000000..09a1418 Binary files /dev/null and b/images/slider/473slider.png differ diff --git a/images/slider/474slider.png b/images/slider/474slider.png new file mode 100644 index 0000000..41ea1de Binary files /dev/null and b/images/slider/474slider.png differ diff --git a/images/slider/475slider.png b/images/slider/475slider.png new file mode 100644 index 0000000..8cdad94 Binary files /dev/null and b/images/slider/475slider.png differ diff --git a/images/slider/476slider.png b/images/slider/476slider.png new file mode 100644 index 0000000..3ccf2e3 Binary files /dev/null and b/images/slider/476slider.png differ diff --git a/images/slider/477slider.png b/images/slider/477slider.png new file mode 100644 index 0000000..2563fe7 Binary files /dev/null and b/images/slider/477slider.png differ diff --git a/images/slider/478slider.png b/images/slider/478slider.png new file mode 100644 index 0000000..4db995a Binary files /dev/null and b/images/slider/478slider.png differ diff --git a/images/slider/479slider.png b/images/slider/479slider.png new file mode 100644 index 0000000..c2a81fb Binary files /dev/null and b/images/slider/479slider.png differ diff --git a/images/slider/480slider.png b/images/slider/480slider.png new file mode 100644 index 0000000..29b2d36 Binary files /dev/null and b/images/slider/480slider.png differ diff --git a/images/slider/481slider.png b/images/slider/481slider.png new file mode 100644 index 0000000..52dd9dd Binary files /dev/null and b/images/slider/481slider.png differ diff --git a/images/slider/482slider.png b/images/slider/482slider.png new file mode 100644 index 0000000..481911d Binary files /dev/null and b/images/slider/482slider.png differ diff --git a/images/slider/483slider.png b/images/slider/483slider.png new file mode 100644 index 0000000..48b4980 Binary files /dev/null and b/images/slider/483slider.png differ diff --git a/images/slider/484slider.png b/images/slider/484slider.png new file mode 100644 index 0000000..4ec1f26 Binary files /dev/null and b/images/slider/484slider.png differ diff --git a/images/slider/485slider.png b/images/slider/485slider.png new file mode 100644 index 0000000..a52b7e9 Binary files /dev/null and b/images/slider/485slider.png differ diff --git a/images/slider/486slider.png b/images/slider/486slider.png new file mode 100644 index 0000000..96c74fe Binary files /dev/null and b/images/slider/486slider.png differ diff --git a/images/slider/487slider.png b/images/slider/487slider.png new file mode 100644 index 0000000..e3b1ce4 Binary files /dev/null and b/images/slider/487slider.png differ diff --git a/images/slider/488slider.png b/images/slider/488slider.png new file mode 100644 index 0000000..17607f7 Binary files /dev/null and b/images/slider/488slider.png differ diff --git a/images/slider/489slider.png b/images/slider/489slider.png new file mode 100644 index 0000000..f9e5bb2 Binary files /dev/null and b/images/slider/489slider.png differ diff --git a/images/slider/490slider.png b/images/slider/490slider.png new file mode 100644 index 0000000..d8bc0f8 Binary files /dev/null and b/images/slider/490slider.png differ diff --git a/images/slider/491slider.png b/images/slider/491slider.png new file mode 100644 index 0000000..4858976 Binary files /dev/null and b/images/slider/491slider.png differ diff --git a/images/slider/492slider.png b/images/slider/492slider.png new file mode 100644 index 0000000..21bd0a4 Binary files /dev/null and b/images/slider/492slider.png differ diff --git a/images/slider/493slider.png b/images/slider/493slider.png new file mode 100644 index 0000000..4876779 Binary files /dev/null and b/images/slider/493slider.png differ diff --git a/images/slider/494slider.png b/images/slider/494slider.png new file mode 100644 index 0000000..2acae72 Binary files /dev/null and b/images/slider/494slider.png differ diff --git a/images/slider/495slider.png b/images/slider/495slider.png new file mode 100644 index 0000000..702425f Binary files /dev/null and b/images/slider/495slider.png differ diff --git a/images/slider/496slider.png b/images/slider/496slider.png new file mode 100644 index 0000000..c124e64 Binary files /dev/null and b/images/slider/496slider.png differ diff --git a/images/slider/497slider.png b/images/slider/497slider.png new file mode 100644 index 0000000..46ef70d Binary files /dev/null and b/images/slider/497slider.png differ diff --git a/images/slider/498slider.png b/images/slider/498slider.png new file mode 100644 index 0000000..4849ba8 Binary files /dev/null and b/images/slider/498slider.png differ diff --git a/images/slider/499slider.png b/images/slider/499slider.png new file mode 100644 index 0000000..82b0c23 Binary files /dev/null and b/images/slider/499slider.png differ diff --git a/images/slider/500slider.png b/images/slider/500slider.png new file mode 100644 index 0000000..362685f Binary files /dev/null and b/images/slider/500slider.png differ diff --git a/images/target/10captcha.png b/images/target/10captcha.png new file mode 100644 index 0000000..0b8b1b8 Binary files /dev/null and b/images/target/10captcha.png differ diff --git a/images/target/11captcha.png b/images/target/11captcha.png new file mode 100644 index 0000000..a79d06d Binary files /dev/null and b/images/target/11captcha.png differ diff --git a/images/target/12captcha.png b/images/target/12captcha.png new file mode 100644 index 0000000..5ee81a7 Binary files /dev/null and b/images/target/12captcha.png differ diff --git a/images/target/13captcha.png b/images/target/13captcha.png new file mode 100644 index 0000000..da6c898 Binary files /dev/null and b/images/target/13captcha.png differ diff --git a/images/target/14captcha.png b/images/target/14captcha.png new file mode 100644 index 0000000..e87cf05 Binary files /dev/null and b/images/target/14captcha.png differ diff --git a/images/target/15captcha.png b/images/target/15captcha.png new file mode 100644 index 0000000..e096228 Binary files /dev/null and b/images/target/15captcha.png differ diff --git a/images/target/16captcha.png b/images/target/16captcha.png new file mode 100644 index 0000000..e957a4b Binary files /dev/null and b/images/target/16captcha.png differ diff --git a/images/target/17captcha.png b/images/target/17captcha.png new file mode 100644 index 0000000..f3e28d0 Binary files /dev/null and b/images/target/17captcha.png differ diff --git a/images/target/18captcha.png b/images/target/18captcha.png new file mode 100644 index 0000000..19183e1 Binary files /dev/null and b/images/target/18captcha.png differ diff --git a/images/target/19captcha.png b/images/target/19captcha.png new file mode 100644 index 0000000..7dee879 Binary files /dev/null and b/images/target/19captcha.png differ diff --git a/images/target/1captcha.png b/images/target/1captcha.png new file mode 100644 index 0000000..3b8626b Binary files /dev/null and b/images/target/1captcha.png differ diff --git a/images/target/20captcha.png b/images/target/20captcha.png new file mode 100644 index 0000000..f679a6b Binary files /dev/null and b/images/target/20captcha.png differ diff --git a/images/target/21captcha.png b/images/target/21captcha.png new file mode 100644 index 0000000..e60e766 Binary files /dev/null and b/images/target/21captcha.png differ diff --git a/images/target/22captcha.png b/images/target/22captcha.png new file mode 100644 index 0000000..6c2ab26 Binary files /dev/null and b/images/target/22captcha.png differ diff --git a/images/target/23captcha.png b/images/target/23captcha.png new file mode 100644 index 0000000..040aa49 Binary files /dev/null and b/images/target/23captcha.png differ diff --git a/images/target/24captcha.png b/images/target/24captcha.png new file mode 100644 index 0000000..54fd5c0 Binary files /dev/null and b/images/target/24captcha.png differ diff --git a/images/target/25captcha.png b/images/target/25captcha.png new file mode 100644 index 0000000..a8437ba Binary files /dev/null and b/images/target/25captcha.png differ diff --git a/images/target/26captcha.png b/images/target/26captcha.png new file mode 100644 index 0000000..b88d2b3 Binary files /dev/null and b/images/target/26captcha.png differ diff --git a/images/target/27captcha.png b/images/target/27captcha.png new file mode 100644 index 0000000..8d51476 Binary files /dev/null and b/images/target/27captcha.png differ diff --git a/images/target/28captcha.png b/images/target/28captcha.png new file mode 100644 index 0000000..957f614 Binary files /dev/null and b/images/target/28captcha.png differ diff --git a/images/target/29captcha.png b/images/target/29captcha.png new file mode 100644 index 0000000..612638b Binary files /dev/null and b/images/target/29captcha.png differ diff --git a/images/target/2captcha.png b/images/target/2captcha.png new file mode 100644 index 0000000..8c5ef56 Binary files /dev/null and b/images/target/2captcha.png differ diff --git a/images/target/30captcha.png b/images/target/30captcha.png new file mode 100644 index 0000000..de3949d Binary files /dev/null and b/images/target/30captcha.png differ diff --git a/images/target/31captcha.png b/images/target/31captcha.png new file mode 100644 index 0000000..78b6596 Binary files /dev/null and b/images/target/31captcha.png differ diff --git a/images/target/32captcha.png b/images/target/32captcha.png new file mode 100644 index 0000000..fa45c7e Binary files /dev/null and b/images/target/32captcha.png differ diff --git a/images/target/33captcha.png b/images/target/33captcha.png new file mode 100644 index 0000000..9fc29f5 Binary files /dev/null and b/images/target/33captcha.png differ diff --git a/images/target/34captcha.png b/images/target/34captcha.png new file mode 100644 index 0000000..28d01e4 Binary files /dev/null and b/images/target/34captcha.png differ diff --git a/images/target/35captcha.png b/images/target/35captcha.png new file mode 100644 index 0000000..897c5ec Binary files /dev/null and b/images/target/35captcha.png differ diff --git a/images/target/36captcha.png b/images/target/36captcha.png new file mode 100644 index 0000000..95c3e12 Binary files /dev/null and b/images/target/36captcha.png differ diff --git a/images/target/37captcha.png b/images/target/37captcha.png new file mode 100644 index 0000000..07c1689 Binary files /dev/null and b/images/target/37captcha.png differ diff --git a/images/target/38captcha.png b/images/target/38captcha.png new file mode 100644 index 0000000..32ebb11 Binary files /dev/null and b/images/target/38captcha.png differ diff --git a/images/target/39captcha.png b/images/target/39captcha.png new file mode 100644 index 0000000..a63f8a2 Binary files /dev/null and b/images/target/39captcha.png differ diff --git a/images/target/3captcha.png b/images/target/3captcha.png new file mode 100644 index 0000000..ed07164 Binary files /dev/null and b/images/target/3captcha.png differ diff --git a/images/target/40captcha.png b/images/target/40captcha.png new file mode 100644 index 0000000..852264d Binary files /dev/null and b/images/target/40captcha.png differ diff --git a/images/target/41captcha.png b/images/target/41captcha.png new file mode 100644 index 0000000..e978398 Binary files /dev/null and b/images/target/41captcha.png differ diff --git a/images/target/421captcha.png b/images/target/421captcha.png new file mode 100644 index 0000000..45ec9cb Binary files /dev/null and b/images/target/421captcha.png differ diff --git a/images/target/422captcha.png b/images/target/422captcha.png new file mode 100644 index 0000000..aa61548 Binary files /dev/null and b/images/target/422captcha.png differ diff --git a/images/target/423captcha.png b/images/target/423captcha.png new file mode 100644 index 0000000..32fe4e4 Binary files /dev/null and b/images/target/423captcha.png differ diff --git a/images/target/424captcha.png b/images/target/424captcha.png new file mode 100644 index 0000000..e1efcde Binary files /dev/null and b/images/target/424captcha.png differ diff --git a/images/target/425captcha.png b/images/target/425captcha.png new file mode 100644 index 0000000..320d593 Binary files /dev/null and b/images/target/425captcha.png differ diff --git a/images/target/426captcha.png b/images/target/426captcha.png new file mode 100644 index 0000000..381af8c Binary files /dev/null and b/images/target/426captcha.png differ diff --git a/images/target/427captcha.png b/images/target/427captcha.png new file mode 100644 index 0000000..6979fc4 Binary files /dev/null and b/images/target/427captcha.png differ diff --git a/images/target/428captcha.png b/images/target/428captcha.png new file mode 100644 index 0000000..a3cce3d Binary files /dev/null and b/images/target/428captcha.png differ diff --git a/images/target/429captcha.png b/images/target/429captcha.png new file mode 100644 index 0000000..f4ff72e Binary files /dev/null and b/images/target/429captcha.png differ diff --git a/images/target/42captcha.png b/images/target/42captcha.png new file mode 100644 index 0000000..1b8810f Binary files /dev/null and b/images/target/42captcha.png differ diff --git a/images/target/430captcha.png b/images/target/430captcha.png new file mode 100644 index 0000000..60b98e6 Binary files /dev/null and b/images/target/430captcha.png differ diff --git a/images/target/431captcha.png b/images/target/431captcha.png new file mode 100644 index 0000000..aac304f Binary files /dev/null and b/images/target/431captcha.png differ diff --git a/images/target/432captcha.png b/images/target/432captcha.png new file mode 100644 index 0000000..45604e9 Binary files /dev/null and b/images/target/432captcha.png differ diff --git a/images/target/433captcha.png b/images/target/433captcha.png new file mode 100644 index 0000000..59b770d Binary files /dev/null and b/images/target/433captcha.png differ diff --git a/images/target/434captcha.png b/images/target/434captcha.png new file mode 100644 index 0000000..538b2af Binary files /dev/null and b/images/target/434captcha.png differ diff --git a/images/target/435captcha.png b/images/target/435captcha.png new file mode 100644 index 0000000..837f396 Binary files /dev/null and b/images/target/435captcha.png differ diff --git a/images/target/436captcha.png b/images/target/436captcha.png new file mode 100644 index 0000000..3d11183 Binary files /dev/null and b/images/target/436captcha.png differ diff --git a/images/target/437captcha.png b/images/target/437captcha.png new file mode 100644 index 0000000..ed50d67 Binary files /dev/null and b/images/target/437captcha.png differ diff --git a/images/target/438captcha.png b/images/target/438captcha.png new file mode 100644 index 0000000..9226ae6 Binary files /dev/null and b/images/target/438captcha.png differ diff --git a/images/target/439captcha.png b/images/target/439captcha.png new file mode 100644 index 0000000..e7701ba Binary files /dev/null and b/images/target/439captcha.png differ diff --git a/images/target/43captcha.png b/images/target/43captcha.png new file mode 100644 index 0000000..6debfad Binary files /dev/null and b/images/target/43captcha.png differ diff --git a/images/target/440captcha.png b/images/target/440captcha.png new file mode 100644 index 0000000..3a21d39 Binary files /dev/null and b/images/target/440captcha.png differ diff --git a/images/target/441captcha.png b/images/target/441captcha.png new file mode 100644 index 0000000..9961aee Binary files /dev/null and b/images/target/441captcha.png differ diff --git a/images/target/442captcha.png b/images/target/442captcha.png new file mode 100644 index 0000000..3d50c1d Binary files /dev/null and b/images/target/442captcha.png differ diff --git a/images/target/443captcha.png b/images/target/443captcha.png new file mode 100644 index 0000000..d8888a7 Binary files /dev/null and b/images/target/443captcha.png differ diff --git a/images/target/444captcha.png b/images/target/444captcha.png new file mode 100644 index 0000000..5effc48 Binary files /dev/null and b/images/target/444captcha.png differ diff --git a/images/target/445captcha.png b/images/target/445captcha.png new file mode 100644 index 0000000..6c7771d Binary files /dev/null and b/images/target/445captcha.png differ diff --git a/images/target/446captcha.png b/images/target/446captcha.png new file mode 100644 index 0000000..eed5e6f Binary files /dev/null and b/images/target/446captcha.png differ diff --git a/images/target/447captcha.png b/images/target/447captcha.png new file mode 100644 index 0000000..b7f01f2 Binary files /dev/null and b/images/target/447captcha.png differ diff --git a/images/target/448captcha.png b/images/target/448captcha.png new file mode 100644 index 0000000..f6ebe64 Binary files /dev/null and b/images/target/448captcha.png differ diff --git a/images/target/449captcha.png b/images/target/449captcha.png new file mode 100644 index 0000000..f40ad0d Binary files /dev/null and b/images/target/449captcha.png differ diff --git a/images/target/44captcha.png b/images/target/44captcha.png new file mode 100644 index 0000000..6c4b855 Binary files /dev/null and b/images/target/44captcha.png differ diff --git a/images/target/450captcha.png b/images/target/450captcha.png new file mode 100644 index 0000000..3246f72 Binary files /dev/null and b/images/target/450captcha.png differ diff --git a/images/target/451captcha.png b/images/target/451captcha.png new file mode 100644 index 0000000..b8c34c2 Binary files /dev/null and b/images/target/451captcha.png differ diff --git a/images/target/452captcha.png b/images/target/452captcha.png new file mode 100644 index 0000000..dd58875 Binary files /dev/null and b/images/target/452captcha.png differ diff --git a/images/target/453captcha.png b/images/target/453captcha.png new file mode 100644 index 0000000..94481b6 Binary files /dev/null and b/images/target/453captcha.png differ diff --git a/images/target/454captcha.png b/images/target/454captcha.png new file mode 100644 index 0000000..86f0d06 Binary files /dev/null and b/images/target/454captcha.png differ diff --git a/images/target/455captcha.png b/images/target/455captcha.png new file mode 100644 index 0000000..42a731e Binary files /dev/null and b/images/target/455captcha.png differ diff --git a/images/target/456captcha.png b/images/target/456captcha.png new file mode 100644 index 0000000..bed7570 Binary files /dev/null and b/images/target/456captcha.png differ diff --git a/images/target/457captcha.png b/images/target/457captcha.png new file mode 100644 index 0000000..349ff9e Binary files /dev/null and b/images/target/457captcha.png differ diff --git a/images/target/458captcha.png b/images/target/458captcha.png new file mode 100644 index 0000000..b6d399d Binary files /dev/null and b/images/target/458captcha.png differ diff --git a/images/target/459captcha.png b/images/target/459captcha.png new file mode 100644 index 0000000..e7d1292 Binary files /dev/null and b/images/target/459captcha.png differ diff --git a/images/target/45captcha.png b/images/target/45captcha.png new file mode 100644 index 0000000..0b96718 Binary files /dev/null and b/images/target/45captcha.png differ diff --git a/images/target/460captcha.png b/images/target/460captcha.png new file mode 100644 index 0000000..844cd0d Binary files /dev/null and b/images/target/460captcha.png differ diff --git a/images/target/461captcha.png b/images/target/461captcha.png new file mode 100644 index 0000000..b37f97b Binary files /dev/null and b/images/target/461captcha.png differ diff --git a/images/target/462captcha.png b/images/target/462captcha.png new file mode 100644 index 0000000..c47ec8b Binary files /dev/null and b/images/target/462captcha.png differ diff --git a/images/target/463captcha.png b/images/target/463captcha.png new file mode 100644 index 0000000..06365d9 Binary files /dev/null and b/images/target/463captcha.png differ diff --git a/images/target/464captcha.png b/images/target/464captcha.png new file mode 100644 index 0000000..32ecddd Binary files /dev/null and b/images/target/464captcha.png differ diff --git a/images/target/465captcha.png b/images/target/465captcha.png new file mode 100644 index 0000000..d441c2d Binary files /dev/null and b/images/target/465captcha.png differ diff --git a/images/target/466captcha.png b/images/target/466captcha.png new file mode 100644 index 0000000..61332f0 Binary files /dev/null and b/images/target/466captcha.png differ diff --git a/images/target/467captcha.png b/images/target/467captcha.png new file mode 100644 index 0000000..5a63cd2 Binary files /dev/null and b/images/target/467captcha.png differ diff --git a/images/target/468captcha.png b/images/target/468captcha.png new file mode 100644 index 0000000..62c333f Binary files /dev/null and b/images/target/468captcha.png differ diff --git a/images/target/469captcha.png b/images/target/469captcha.png new file mode 100644 index 0000000..471396c Binary files /dev/null and b/images/target/469captcha.png differ diff --git a/images/target/46captcha.png b/images/target/46captcha.png new file mode 100644 index 0000000..316f5f6 Binary files /dev/null and b/images/target/46captcha.png differ diff --git a/images/target/470captcha.png b/images/target/470captcha.png new file mode 100644 index 0000000..69d2bfa Binary files /dev/null and b/images/target/470captcha.png differ diff --git a/images/target/471captcha.png b/images/target/471captcha.png new file mode 100644 index 0000000..b3b8fe0 Binary files /dev/null and b/images/target/471captcha.png differ diff --git a/images/target/472captcha.png b/images/target/472captcha.png new file mode 100644 index 0000000..91da601 Binary files /dev/null and b/images/target/472captcha.png differ diff --git a/images/target/473captcha.png b/images/target/473captcha.png new file mode 100644 index 0000000..912918a Binary files /dev/null and b/images/target/473captcha.png differ diff --git a/images/target/474captcha.png b/images/target/474captcha.png new file mode 100644 index 0000000..c5dcb64 Binary files /dev/null and b/images/target/474captcha.png differ diff --git a/images/target/475captcha.png b/images/target/475captcha.png new file mode 100644 index 0000000..ccf592f Binary files /dev/null and b/images/target/475captcha.png differ diff --git a/images/target/476captcha.png b/images/target/476captcha.png new file mode 100644 index 0000000..524726e Binary files /dev/null and b/images/target/476captcha.png differ diff --git a/images/target/477captcha.png b/images/target/477captcha.png new file mode 100644 index 0000000..bbdde5b Binary files /dev/null and b/images/target/477captcha.png differ diff --git a/images/target/478captcha.png b/images/target/478captcha.png new file mode 100644 index 0000000..a56505e Binary files /dev/null and b/images/target/478captcha.png differ diff --git a/images/target/479captcha.png b/images/target/479captcha.png new file mode 100644 index 0000000..42084ab Binary files /dev/null and b/images/target/479captcha.png differ diff --git a/images/target/47captcha.png b/images/target/47captcha.png new file mode 100644 index 0000000..607bf5a Binary files /dev/null and b/images/target/47captcha.png differ diff --git a/images/target/480captcha.png b/images/target/480captcha.png new file mode 100644 index 0000000..36fa041 Binary files /dev/null and b/images/target/480captcha.png differ diff --git a/images/target/481captcha.png b/images/target/481captcha.png new file mode 100644 index 0000000..63532c0 Binary files /dev/null and b/images/target/481captcha.png differ diff --git a/images/target/482captcha.png b/images/target/482captcha.png new file mode 100644 index 0000000..038a590 Binary files /dev/null and b/images/target/482captcha.png differ diff --git a/images/target/483captcha.png b/images/target/483captcha.png new file mode 100644 index 0000000..f41750f Binary files /dev/null and b/images/target/483captcha.png differ diff --git a/images/target/484captcha.png b/images/target/484captcha.png new file mode 100644 index 0000000..f5ac523 Binary files /dev/null and b/images/target/484captcha.png differ diff --git a/images/target/485captcha.png b/images/target/485captcha.png new file mode 100644 index 0000000..525866e Binary files /dev/null and b/images/target/485captcha.png differ diff --git a/images/target/486captcha.png b/images/target/486captcha.png new file mode 100644 index 0000000..fdfda7b Binary files /dev/null and b/images/target/486captcha.png differ diff --git a/images/target/487captcha.png b/images/target/487captcha.png new file mode 100644 index 0000000..c47ec8b Binary files /dev/null and b/images/target/487captcha.png differ diff --git a/images/target/488captcha.png b/images/target/488captcha.png new file mode 100644 index 0000000..26ccfbd Binary files /dev/null and b/images/target/488captcha.png differ diff --git a/images/target/489captcha.png b/images/target/489captcha.png new file mode 100644 index 0000000..008d541 Binary files /dev/null and b/images/target/489captcha.png differ diff --git a/images/target/48captcha.png b/images/target/48captcha.png new file mode 100644 index 0000000..5b7fbfc Binary files /dev/null and b/images/target/48captcha.png differ diff --git a/images/target/490captcha.png b/images/target/490captcha.png new file mode 100644 index 0000000..7dad7f0 Binary files /dev/null and b/images/target/490captcha.png differ diff --git a/images/target/491captcha.png b/images/target/491captcha.png new file mode 100644 index 0000000..fb27648 Binary files /dev/null and b/images/target/491captcha.png differ diff --git a/images/target/492captcha.png b/images/target/492captcha.png new file mode 100644 index 0000000..bde7c6e Binary files /dev/null and b/images/target/492captcha.png differ diff --git a/images/target/493captcha.png b/images/target/493captcha.png new file mode 100644 index 0000000..053daea Binary files /dev/null and b/images/target/493captcha.png differ diff --git a/images/target/494captcha.png b/images/target/494captcha.png new file mode 100644 index 0000000..7830246 Binary files /dev/null and b/images/target/494captcha.png differ diff --git a/images/target/495captcha.png b/images/target/495captcha.png new file mode 100644 index 0000000..752db9f Binary files /dev/null and b/images/target/495captcha.png differ diff --git a/images/target/496captcha.png b/images/target/496captcha.png new file mode 100644 index 0000000..cb26a8c Binary files /dev/null and b/images/target/496captcha.png differ diff --git a/images/target/497captcha.png b/images/target/497captcha.png new file mode 100644 index 0000000..136216d Binary files /dev/null and b/images/target/497captcha.png differ diff --git a/images/target/498captcha.png b/images/target/498captcha.png new file mode 100644 index 0000000..858c043 Binary files /dev/null and b/images/target/498captcha.png differ diff --git a/images/target/499captcha.png b/images/target/499captcha.png new file mode 100644 index 0000000..8052fdf Binary files /dev/null and b/images/target/499captcha.png differ diff --git a/images/target/49captcha.png b/images/target/49captcha.png new file mode 100644 index 0000000..8798826 Binary files /dev/null and b/images/target/49captcha.png differ diff --git a/images/target/4captcha.png b/images/target/4captcha.png new file mode 100644 index 0000000..02159fe Binary files /dev/null and b/images/target/4captcha.png differ diff --git a/images/target/500captcha.png b/images/target/500captcha.png new file mode 100644 index 0000000..45d9473 Binary files /dev/null and b/images/target/500captcha.png differ diff --git a/images/target/50captcha.png b/images/target/50captcha.png new file mode 100644 index 0000000..4ca8e48 Binary files /dev/null and b/images/target/50captcha.png differ diff --git a/images/target/51captcha.png b/images/target/51captcha.png new file mode 100644 index 0000000..881c06d Binary files /dev/null and b/images/target/51captcha.png differ diff --git a/images/target/52captcha.png b/images/target/52captcha.png new file mode 100644 index 0000000..011bc5b Binary files /dev/null and b/images/target/52captcha.png differ diff --git a/images/target/53captcha.png b/images/target/53captcha.png new file mode 100644 index 0000000..faa2739 Binary files /dev/null and b/images/target/53captcha.png differ diff --git a/images/target/54captcha.png b/images/target/54captcha.png new file mode 100644 index 0000000..01d36a7 Binary files /dev/null and b/images/target/54captcha.png differ diff --git a/images/target/55captcha.png b/images/target/55captcha.png new file mode 100644 index 0000000..12f1ff7 Binary files /dev/null and b/images/target/55captcha.png differ diff --git a/images/target/56captcha.png b/images/target/56captcha.png new file mode 100644 index 0000000..56dc659 Binary files /dev/null and b/images/target/56captcha.png differ diff --git a/images/target/57captcha.png b/images/target/57captcha.png new file mode 100644 index 0000000..4a735aa Binary files /dev/null and b/images/target/57captcha.png differ diff --git a/images/target/58captcha.png b/images/target/58captcha.png new file mode 100644 index 0000000..c08f604 Binary files /dev/null and b/images/target/58captcha.png differ diff --git a/images/target/59captcha.png b/images/target/59captcha.png new file mode 100644 index 0000000..9c1218f Binary files /dev/null and b/images/target/59captcha.png differ diff --git a/images/target/5captcha.png b/images/target/5captcha.png new file mode 100644 index 0000000..2bc9c5b Binary files /dev/null and b/images/target/5captcha.png differ diff --git a/images/target/60captcha.png b/images/target/60captcha.png new file mode 100644 index 0000000..8ce480b Binary files /dev/null and b/images/target/60captcha.png differ diff --git a/images/target/61captcha.png b/images/target/61captcha.png new file mode 100644 index 0000000..0371913 Binary files /dev/null and b/images/target/61captcha.png differ diff --git a/images/target/62captcha.png b/images/target/62captcha.png new file mode 100644 index 0000000..18f708b Binary files /dev/null and b/images/target/62captcha.png differ diff --git a/images/target/63captcha.png b/images/target/63captcha.png new file mode 100644 index 0000000..ac23060 Binary files /dev/null and b/images/target/63captcha.png differ diff --git a/images/target/64captcha.png b/images/target/64captcha.png new file mode 100644 index 0000000..31be28e Binary files /dev/null and b/images/target/64captcha.png differ diff --git a/images/target/65captcha.png b/images/target/65captcha.png new file mode 100644 index 0000000..068b43a Binary files /dev/null and b/images/target/65captcha.png differ diff --git a/images/target/66captcha.png b/images/target/66captcha.png new file mode 100644 index 0000000..6deca5e Binary files /dev/null and b/images/target/66captcha.png differ diff --git a/images/target/6captcha.png b/images/target/6captcha.png new file mode 100644 index 0000000..f7485c0 Binary files /dev/null and b/images/target/6captcha.png differ diff --git a/images/target/7captcha.png b/images/target/7captcha.png new file mode 100644 index 0000000..d87e0a2 Binary files /dev/null and b/images/target/7captcha.png differ diff --git a/images/target/8captcha.png b/images/target/8captcha.png new file mode 100644 index 0000000..2b3cf38 Binary files /dev/null and b/images/target/8captcha.png differ diff --git a/images/target/9captcha.png b/images/target/9captcha.png new file mode 100644 index 0000000..447ab8c Binary files /dev/null and b/images/target/9captcha.png differ diff --git a/images/visual/滑块-1.png b/images/visual/滑块-1.png new file mode 100644 index 0000000..6a52b62 Binary files /dev/null and b/images/visual/滑块-1.png differ diff --git a/images/visual/滑块-2.png b/images/visual/滑块-2.png new file mode 100644 index 0000000..69a6c7e Binary files /dev/null and b/images/visual/滑块-2.png differ diff --git a/images/visual/滑块-3.png b/images/visual/滑块-3.png new file mode 100644 index 0000000..364ddff Binary files /dev/null and b/images/visual/滑块-3.png differ diff --git a/images/visual/滑块-4.png b/images/visual/滑块-4.png new file mode 100644 index 0000000..0dc7f59 Binary files /dev/null and b/images/visual/滑块-4.png differ diff --git a/images/visual/滑块-5.png b/images/visual/滑块-5.png new file mode 100644 index 0000000..9e5ddfd Binary files /dev/null and b/images/visual/滑块-5.png differ diff --git a/images/visual/滑块-6.png b/images/visual/滑块-6.png new file mode 100644 index 0000000..901b04a Binary files /dev/null and b/images/visual/滑块-6.png differ diff --git a/images/visual/滑块-7.png b/images/visual/滑块-7.png new file mode 100644 index 0000000..67138ba Binary files /dev/null and b/images/visual/滑块-7.png differ diff --git a/images/visual/滑块-8.png b/images/visual/滑块-8.png new file mode 100644 index 0000000..63a02c3 Binary files /dev/null and b/images/visual/滑块-8.png differ diff --git a/images/visual/滑块.png b/images/visual/滑块.png new file mode 100644 index 0000000..4ace53a Binary files /dev/null and b/images/visual/滑块.png differ diff --git a/package.json b/package.json new file mode 100644 index 0000000..fd0d2c4 --- /dev/null +++ b/package.json @@ -0,0 +1,21 @@ +{ + "name": "captcha_cracker", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "build": "tsc", + "detect": "ts-node --transpile-only src/detector.ts" + }, + "keywords": [], + "author": "", + "license": "ISC", + "dependencies": { + "sharp": "^0.34.4" + }, + "devDependencies": { + "@types/node": "^24.9.1", + "ts-node": "^10.9.2", + "typescript": "^5.9.3" + } +} diff --git a/spec.md b/spec.md new file mode 100644 index 0000000..d841ca8 --- /dev/null +++ b/spec.md @@ -0,0 +1,140 @@ + +## 需求 +基于sharp图像处理库,实现图片中的滑块精确识别。 + +## 滑块形状图片和标注图片 +滑块抠图放在images/slider目录中,这些图片是滑块的形状。注意:方形滑块的某些边可能有半圆形凹陷或凸起。 +标注滑块的图片放在images/target中,图中标有target文字的框框是目标。注意:要识别的不是target框,而是target所框的滑块。 + +## 程序识别标准 +images/douban-target下放置手工红框标注的图片,**目标在红色框框内**,作为算法生成结果的比对。 +如果同一张图片程序生成框和人工框匹配,通过测试,否则继续优化。 +注意:不是下方绿色的滑块,是红框内的滑块。 + +## 豆瓣滑块截图 +images/douban目录放置豆瓣网滑块真实截图。 +完成代码后,使用该目录的图片中做验证,并用蓝色方框框出滑块准确位置,输出到images/output目录。 + +## 思路 +先识别douban-target中的红框手工标注的位置,作为基准,检验output中的标注与基准比较,来确定是否正确识别 +douban-target中的图片是手工标注,所以允许output标注结果与基准有少量偏差,比如10px + +改进思路: +1. 不能根据亮度,滑块的亮度不确定。 +2. 图片中滑块只有2个 +3. 滑块形状是正方形,其中2个边,有半圆凹陷或凸起。 +最佳方法是边缘检测。滑块边框色调基本一致。 + + +## 网友思路,滑块预处理 + +先提取一下滑块的轮廓,抖音的滑块特征很明显,可以不用cv2.Canny来提取边缘特征。 + +具体步骤如下: + - 去除外围透明像素点(滑块外层的像素点的a值都是0) + - 将图片转成灰度图并进行二值化操作(0和255) + - 只保留二值化为255的像素点 + - 去除多余噪声 + +### 读取rgba格式的滑块 +import cv2 +input_img = cv2.imread("slide.png", cv2.IMREAD_UNCHANGED) +将透明值为0的像素点设置为纯黑色 + +### 取透明维度的值 +alpha_channel = input_img[:, :, 3] + +### 只使用rgb三个维度的值 +rgb_image = input_img[:, :, :3] +rgb_image[alpha_channel == 0] = [0, 0, 0] +提取白色边缘并设置成黑色,将其他像素点设置为白色 + +gray = cv2.cvtColor(rgb_image, cv2.COLOR_BGR2GRAY) +_, thresholded = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY) +white_img = np.ones_like(rgb_image) * 255 +white_img[thresholded == 255] = [0, 0, 0] + +### 去除噪声(判断某个黑色像素点周围3x3范围内有多少个黑色像素点,少于阈值认为是噪声) + +def count_black_neighbors_by_cv2(gray_image): + if gray_image.ndim == 3: + gray_image = cv2.cvtColor(gray_image, cv2.COLOR_BGR2GRAY) + _, binary_image = cv2.threshold(gray_image, 240, 255, cv2.THRESH_BINARY_INV) + binary_image = binary_image // 255 + kernel = np.ones((3, 3), dtype=np.uint8) + kernel[1, 1] = 0 + black_neighbors = cv2.filter2D(binary_image, -1, kernel) + # 设置边缘为0 + black_neighbors[:, 0] = 0 + black_neighbors[:, 109] = 0 + return black_neighbors + +当然也可以通过遍历来实现,这样更容易理解点 + +def count_black_neighbors_by_range(gray_image): + # 将图像转换为灰度图 + if len(gray_image.shape) == 3: + gray_image = cv2.cvtColor(gray_image, cv2.COLOR_BGR2GRAY) + # 二值化图像 + _, binary_image = cv2.threshold(gray_image, 240, 255, cv2.THRESH_BINARY_INV) + binary_image = binary_image // 255 + # 创建一个与输入图像大小相同的全零数组 + black_neighbors = np.zeros_like(binary_image) + + # 遍历图像中的3x3邻域,计算每个像素 + neighbor_offsets = [(-1, -1), (-1, 0), (-1, 1), + (0, -1), (0, 1), + (1, -1), (1, 0), (1, 1)] + + # 遍历每个像素 + rows, cols = binary_image.shape + for row in range(1, rows - 1): + for col in range(1, cols - 1): + # 当它本身不是黑色像素点的时候,就不计算 + if binary_image[row, col] != 1: + continue + count = 0 + for offset in neighbor_offsets: + neighbor_row, neighbor_col = row + offset[0], col + offset[1] + if binary_image[neighbor_row, neighbor_col] == 1: + count += 1 + black_neighbors[row, col] = count + + return black_neighbors + +black_neighbors = count_black_neighbors_by_range(white_img) +output = np.ones_like(rgb_image) * 255 +output[black_neighbors > 4] = 0 + + +好了,现在可以把上面看到的内容忘掉了,因为在实际识别的时候用不到(我发现不做处理比做处理识别的准确率要高很多),直接识别准确率甚至接近百分百了。 + +### 下面是识别的完整代码 + +import os +import cv2 + +def get_slide_distance(bg_path, slide_path): + ''' + 识别滑块具体位置,返回位置比例: 位置/图片宽度 + 使用的时候再乘以实际图片宽度即可 + ''' + bg_img = cv2.imread(bg_path) + sd_img = cv2.imread(slide_path) + bg_gray = cv2.cvtColor(bg_img, cv2.COLOR_BGR2GRAY) + bg_gray = cv2.GaussianBlur(bg_gray, (5, 5), 0) + bg_edge = cv2.Canny(bg_gray, 30, 100) + rgb_bg_gray = cv2.cvtColor(bg_edge, cv2.COLOR_GRAY2RGB) + + sd_gray = cv2.cvtColor(sd_img, cv2.COLOR_BGR2GRAY) + sd_gray = cv2.GaussianBlur(sd_gray, (5, 5), 0) + sd_edge = cv2.Canny(sd_gray, 30, 100) + rgb_sd_gray = cv2.cvtColor(sd_edge, cv2.COLOR_GRAY2RGB) + result = cv2.matchTemplate(rgb_bg_gray, rgb_sd_gray, cv2.TM_CCORR_NORMED) + _, _, _, max_loc = cv2.minMaxLoc(result) + cv2.rectangle(bg_img, (max_loc[0], max_loc[1]), (max_loc[0]+110, max_loc[1] + 110), + (0, 255, 0), 2) + result_path = os.path.join(os.path.dirname(bg_path), "result.png") + cv2.imwrite(result_path, bg_img) + return max_loc[0]/bg_gray.shape[1] + diff --git a/src/analyze-6.ts b/src/analyze-6.ts new file mode 100644 index 0000000..e0b2579 --- /dev/null +++ b/src/analyze-6.ts @@ -0,0 +1,75 @@ +import sharp from 'sharp'; +import * as path from 'path'; + +async function analyze() { + const imagePath = path.join(__dirname, '..', 'images', 'douban', '滑块-6.png'); + const { data, info } = await sharp(imagePath).raw().toBuffer({ resolveWithObject: true }); + const { width, height, channels } = info; + + console.log(`图片尺寸: ${width}x${height}`); + + const darkThreshold = 85; + const darkMap = new Uint8Array(width * height); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx], g = data[idx + 1], b = data[idx + 2]; + const brightness = r * 0.299 + g * 0.587 + b * 0.114; + darkMap[y * width + x] = brightness < darkThreshold ? 1 : 0; + } + } + + // 找连通区域 + const visited = new Uint8Array(width * height); + const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + if (visited[idx] === 0 && darkMap[idx] === 1) { + let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0; + const stack: Array<[number, number]> = [[x, y]]; + + while (stack.length > 0) { + const [cx, cy] = stack.pop()!; + if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue; + const cidx = cy * width + cx; + if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue; + + visited[cidx] = 1; + pixelCount++; + minX = Math.min(minX, cx); + minY = Math.min(minY, cy); + maxX = Math.max(maxX, cx); + maxY = Math.max(maxY, cy); + + stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]); + } + + const w = maxX - minX + 1; + const h = maxY - minY + 1; + if (w >= 20 && h >= 20 && w < width * 0.9 && h < height * 0.9) { + regions.push({x: minX, y: minY, w, h, pixels: pixelCount}); + } + } + } + } + + console.log(`\n找到 ${regions.length} 个区域`); + regions.sort((a, b) => b.pixels - a.pixels); + + console.log('\n所有区域(按面积排序):'); + regions.forEach((r, i) => { + const aspectRatio = r.w / r.h; + const density = r.pixels / (r.w * r.h); + const match = + r.w >= 50 && r.w <= 95 && + r.h >= 50 && r.h <= 95 && + aspectRatio >= 0.85 && aspectRatio <= 1.18 && + density > 0.65; + console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)} ${match ? '✓' : ''}`); + }); +} + +analyze().catch(console.error); diff --git a/src/analyze-pixel.ts b/src/analyze-pixel.ts new file mode 100644 index 0000000..ff36beb --- /dev/null +++ b/src/analyze-pixel.ts @@ -0,0 +1,91 @@ +import sharp from 'sharp'; +import * as path from 'path'; + +/** + * 分析特定区域的像素亮度 + */ +async function analyzeRegion(imagePath: string, regions: Array<{name: string, x: number, y: number, width: number, height: number}>) { + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width: imgWidth, height: imgHeight, channels } = info; + + console.log(`\n分析图片: ${path.basename(imagePath)}`); + console.log(`图片尺寸: ${imgWidth}×${imgHeight}\n`); + + for (const region of regions) { + console.log(`区域: ${region.name}`); + console.log(` 位置: (${region.x}, ${region.y}), 大小: ${region.width}×${region.height}`); + + let totalBrightness = 0; + let darkCount = 0; // < 90 + let count = 0; + let minBright = 255; + let maxBright = 0; + + for (let y = region.y; y < region.y + region.height && y < imgHeight; y++) { + for (let x = region.x; x < region.x + region.width && x < imgWidth; x++) { + const idx = (y * imgWidth + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + const brightness = r * 0.299 + g * 0.587 + b * 0.114; + + totalBrightness += brightness; + count++; + + if (brightness < 90) darkCount++; + minBright = Math.min(minBright, brightness); + maxBright = Math.max(maxBright, brightness); + } + } + + const avgBrightness = count > 0 ? totalBrightness / count : 0; + const darkRatio = count > 0 ? (darkCount / count * 100) : 0; + + console.log(` 平均亮度: ${avgBrightness.toFixed(1)}`); + console.log(` 亮度范围: ${minBright.toFixed(0)} - ${maxBright.toFixed(0)}`); + console.log(` 暗像素比例(<90): ${darkRatio.toFixed(1)}%`); + console.log(); + } +} + +async function main() { + const baseDir = path.join(__dirname, '..'); + + // 分析几个关键图片的特定区域 + + // 滑块.png - 完全未检测到 + await analyzeRegion( + path.join(baseDir, 'images', 'douban', '滑块.png'), + [ + { name: '左侧滑块', x: 131, y: 408, width: 87, height: 88 }, + { name: '右侧滑块', x: 375, y: 407, width: 88, height: 89 }, + { name: '背景区域', x: 300, y: 200, width: 50, height: 50 } + ] + ); + + // 滑块-2.png - 只检测到1个,漏检2个 + await analyzeRegion( + path.join(baseDir, 'images', 'douban', '滑块-2.png'), + [ + { name: '左侧滑块', x: 125, y: 245, width: 89, height: 91 }, + { name: '右侧滑块', x: 454, y: 244, width: 90, height: 92 }, + { name: '误检区域', x: 660, y: 164, width: 78, height: 51 } + ] + ); + + // 滑块-6.png - 检测到2个但都是误检 + await analyzeRegion( + path.join(baseDir, 'images', 'douban', '滑块-6.png'), + [ + { name: '左侧目标', x: 116, y: 319, width: 91, height: 91 }, + { name: '右侧目标', x: 574, y: 318, width: 92, height: 92 }, + { name: '误检1', x: 149, y: 456, width: 95, height: 107 }, + { name: '误检2', x: 68, y: 437, width: 74, height: 126 } + ] + ); +} + +main().catch(console.error); diff --git a/src/analyze.ts b/src/analyze.ts new file mode 100644 index 0000000..6cd1910 --- /dev/null +++ b/src/analyze.ts @@ -0,0 +1,61 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; + +async function analyzeRedBox(imagePath: string) { + const basename = path.basename(imagePath); + console.log(`\n分析: ${basename}`); + + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + // 查找红色像素 + let minX = width; + let minY = height; + let maxX = 0; + let maxY = 0; + let foundRed = false; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + + if (r > 200 && g < 100 && b < 100) { + foundRed = true; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + } + } + + if (foundRed) { + const boxWidth = maxX - minX + 1; + const boxHeight = maxY - minY + 1; + console.log(` 红框位置: [x=${minX}, y=${minY}, w=${boxWidth}, h=${boxHeight}]`); + console.log(` 宽高比: ${(boxWidth / boxHeight).toFixed(2)}`); + console.log(` 相对位置: y=${(minY / height * 100).toFixed(1)}% (高度)`); + } else { + console.log(` 未找到红框`); + } +} + +async function main() { + const baseDir = path.join(__dirname, '..'); + const targetDir = path.join(baseDir, 'images', 'douban-target'); + + const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png')).slice(0, 9); + + for (const file of files) { + await analyzeRedBox(path.join(targetDir, file)); + } +} + +main().catch(console.error); diff --git a/src/debug-failed.ts b/src/debug-failed.ts new file mode 100644 index 0000000..c1ad8df --- /dev/null +++ b/src/debug-failed.ts @@ -0,0 +1,128 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; + +async function debugSingle(imagePath: string) { + const basename = path.basename(imagePath); + console.log(`\n=== ${basename} ===`); + + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + const darkThreshold = 85; + const darkMap = new Uint8Array(width * height); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + const brightness = (r * 0.299 + g * 0.587 + b * 0.114); + + darkMap[y * width + x] = brightness < darkThreshold ? 1 : 0; + } + } + + const visited = new Uint8Array(width * height); + const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + if (visited[idx] === 0 && darkMap[idx] === 1) { + let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0; + const stack: Array<[number, number]> = [[x, y]]; + + while (stack.length > 0) { + const [cx, cy] = stack.pop()!; + if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue; + const cidx = cy * width + cx; + if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue; + + visited[cidx] = 1; + pixelCount++; + minX = Math.min(minX, cx); + minY = Math.min(minY, cy); + maxX = Math.max(maxX, cx); + maxY = Math.max(maxY, cy); + + stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]); + } + + const w = maxX - minX + 1; + const h = maxY - minY + 1; + if (w >= 20 && h >= 20 && w < width * 0.9 && h < height * 0.9) { + regions.push({x: minX, y: minY, w, h, pixels: pixelCount}); + } + } + } + } + + console.log(`找到 ${regions.length} 个有效连通区域`); + + // 过滤符合条件的候选 + const candidates = regions.filter(r => { + const aspectRatio = r.w / r.h; + const density = r.pixels / (r.w * r.h); + const centerY = r.y + r.h / 2; + + return ( + r.w >= 50 && r.w <= 95 && + r.h >= 50 && r.h <= 95 && + aspectRatio >= 0.85 && aspectRatio <= 1.18 && + centerY > height * 0.12 && + centerY < height * 0.78 && + density > 0.65 + ); + }); + + console.log(`符合严格条件的候选: ${candidates.length} 个`); + + if (candidates.length > 0) { + candidates.forEach((r, i) => { + const aspectRatio = r.w / r.h; + const density = r.pixels / (r.w * r.h); + console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`); + }); + } else { + // 尝试放宽条件 + const relaxed = regions.filter(r => { + const aspectRatio = r.w / r.h; + const density = r.pixels / (r.w * r.h); + + return ( + r.w >= 45 && r.w <= 100 && + r.h >= 45 && r.h <= 100 && + aspectRatio >= 0.75 && aspectRatio <= 1.33 && + r.y < height * 0.82 && + r.y > height * 0.06 && + density > 0.55 + ); + }); + + console.log(`符合放宽条件的候选: ${relaxed.length} 个`); + relaxed.slice(0, 5).forEach((r, i) => { + const aspectRatio = r.w / r.h; + const density = r.pixels / (r.w * r.h); + console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`); + }); + } +} + +async function main() { + const baseDir = path.join(__dirname, '..'); + const doubanDir = path.join(baseDir, 'images', 'douban'); + + // 检查未检测到的图片 + const failedFiles = ['滑块-2.png', '滑块-3.png', '滑块-6.png', '滑块-7.png', '滑块.png']; + + for (const file of failedFiles) { + await debugSingle(path.join(doubanDir, file)); + } +} + +main().catch(console.error); diff --git a/src/debug-regions.ts b/src/debug-regions.ts new file mode 100644 index 0000000..6438a89 --- /dev/null +++ b/src/debug-regions.ts @@ -0,0 +1,98 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; + +async function debugRegions(imagePath: string) { + const basename = path.basename(imagePath); + console.log(`\n=== ${basename} ===`); + + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + // 检测暗色像素 + const darkThreshold = 85; + const darkMap = new Uint8Array(width * height); + let darkPixelCount = 0; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + const brightness = (r * 0.299 + g * 0.587 + b * 0.114); + + if (brightness < darkThreshold) { + darkMap[y * width + x] = 1; + darkPixelCount++; + } + } + } + + console.log(`暗色像素占比: ${(darkPixelCount / (width * height) * 100).toFixed(2)}%`); + + // 找连通区域(简化版) + const visited = new Uint8Array(width * height); + const regions: Array<{x: number; y: number; w: number; h: number; pixels: number}> = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + if (visited[idx] === 0 && darkMap[idx] === 1) { + let minX = x, minY = y, maxX = x, maxY = y, pixelCount = 0; + const stack: Array<[number, number]> = [[x, y]]; + + while (stack.length > 0) { + const [cx, cy] = stack.pop()!; + if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue; + const cidx = cy * width + cx; + if (visited[cidx] === 1 || darkMap[cidx] !== 1) continue; + + visited[cidx] = 1; + pixelCount++; + minX = Math.min(minX, cx); + minY = Math.min(minY, cy); + maxX = Math.max(maxX, cx); + maxY = Math.max(maxY, cy); + + stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]); + } + + const w = maxX - minX + 1; + const h = maxY - minY + 1; + if (w >= 20 && h >= 20) { + regions.push({x: minX, y: minY, w, h, pixels: pixelCount}); + } + } + } + } + + console.log(`找到 ${regions.length} 个连通区域(>= 20x20)`); + + // 按面积排序并显示前10个 + regions.sort((a, b) => (b.w * b.h) - (a.w * a.h)); + + console.log('\n前10个最大区域:'); + for (let i = 0; i < Math.min(10, regions.length); i++) { + const r = regions[i]; + const aspectRatio = r.w / r.h; + const density = r.pixels / (r.w * r.h); + console.log(` ${i + 1}. [x=${r.x}, y=${r.y}, w=${r.w}, h=${r.h}] 宽高比=${aspectRatio.toFixed(2)}, 密度=${density.toFixed(2)}`); + } +} + +async function main() { + const baseDir = path.join(__dirname, '..'); + const doubanDir = path.join(baseDir, 'images', 'douban'); + + const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')).slice(0, 3); + + for (const file of files) { + await debugRegions(path.join(doubanDir, file)); + } +} + +main().catch(console.error); diff --git a/src/debug-results.ts b/src/debug-results.ts new file mode 100644 index 0000000..c7b46f7 --- /dev/null +++ b/src/debug-results.ts @@ -0,0 +1,118 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { SliderValidator, BoundingBox as ValidatorBox } from './validator'; +import { SliderDetector } from './detector'; + +async function main() { + const detector = new SliderDetector(); + const validator = new SliderValidator(); + const baseDir = path.join(__dirname, '..'); + const doubanDir = path.join(baseDir, 'images', 'douban'); + const doubanTargetDir = path.join(baseDir, 'images', 'douban-target'); + + console.log('=== 详细调试检测结果 ===\n'); + + const files = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png')); + + for (const file of files) { + const imagePath = path.join(doubanDir, file); + const targetPath = path.join(doubanTargetDir, file); + + if (!fs.existsSync(imagePath)) continue; + + console.log(`\n【${file}】`); + console.log('─'.repeat(60)); + + // 获取标准答案 + const targetBoxes = await validator.extractRedBoxes(targetPath); + console.log(`标准答案(${targetBoxes.length}个):`); + targetBoxes.forEach((box, i) => { + console.log(` 目标${i + 1}: x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`); + }); + + // 获取检测结果 + const detected = await detector.detectSlider(imagePath, undefined, true); + const detectedBoxes = detected ? (Array.isArray(detected) ? detected : [detected]) : []; + + console.log(`\n检测结果(${detectedBoxes.length}个):`); + detectedBoxes.forEach((box, i) => { + console.log(` 检测${i + 1}: x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`); + }); + + // 详细匹配分析 + console.log(`\n匹配分析(容差10px):`); + + const detectedValidatorBoxes: ValidatorBox[] = detectedBoxes.map(b => ({ + x: b.x, + y: b.y, + width: b.width, + height: b.height + })); + + const result = await validator.validateDetection(detectedValidatorBoxes, targetBoxes, 10); + + // 显示每个匹配对 + if (result.matches.length > 0) { + console.log(` 成功匹配 ${result.matches.length} 对:`); + result.matches.forEach((match, i) => { + const det = match.detected; + const tgt = match.target; + const detCenter = [det.x + det.width/2, det.y + det.height/2]; + const tgtCenter = [tgt.x + tgt.width/2, tgt.y + tgt.height/2]; + const distance = Math.sqrt( + Math.pow(detCenter[0] - tgtCenter[0], 2) + + Math.pow(detCenter[1] - tgtCenter[1], 2) + ); + console.log(` 配对${i + 1}: IoU=${match.iou.toFixed(3)}, 中心距离=${distance.toFixed(1)}px`); + console.log(` 检测框: (${det.x}, ${det.y}) ${det.width}×${det.height}`); + console.log(` 目标框: (${tgt.x}, ${tgt.y}) ${tgt.width}×${tgt.height}`); + }); + } + + // 显示漏检的目标 + if (result.matchedCount < result.totalTargets) { + const matched = result.matches.map(m => m.target); + const unmatched = targetBoxes.filter(t => + !matched.some(m => m.x === t.x && m.y === t.y && m.width === t.width && m.height === t.height) + ); + console.log(` ⚠️ 漏检 ${unmatched.length} 个目标:`); + unmatched.forEach((box, i) => { + console.log(` 目标${i + 1}: (${box.x}, ${box.y}) ${box.width}×${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`); + + // 找最接近的检测框 + if (detectedValidatorBoxes.length > 0) { + let minDist = Infinity; + let closest = null; + for (const det of detectedValidatorBoxes) { + const detCenter = [det.x + det.width/2, det.y + det.height/2]; + const tgtCenter = [box.x + box.width/2, box.y + box.height/2]; + const dist = Math.sqrt( + Math.pow(detCenter[0] - tgtCenter[0], 2) + + Math.pow(detCenter[1] - tgtCenter[1], 2) + ); + if (dist < minDist) { + minDist = dist; + closest = det; + } + } + if (closest) { + console.log(` 最接近检测框: (${closest.x}, ${closest.y}) ${closest.width}×${closest.height}, 距离=${minDist.toFixed(1)}px`); + } + } + }); + } + + // 显示误检 + if (result.unmatched.length > 0) { + console.log(` ⚠️ 误检 ${result.unmatched.length} 个:`); + result.unmatched.forEach((box, i) => { + console.log(` 误检${i + 1}: (${box.x}, ${box.y}) ${box.width}×${box.height}, center=(${box.x + box.width/2}, ${box.y + box.height/2})`); + }); + } + + console.log(` 准确率: ${(result.precision * 100).toFixed(1)}%`); + console.log(` 召回率: ${(result.recall * 100).toFixed(1)}%`); + } +} + +main().catch(console.error); diff --git a/src/debug-single.ts b/src/debug-single.ts new file mode 100644 index 0000000..c01af50 --- /dev/null +++ b/src/debug-single.ts @@ -0,0 +1,102 @@ +import sharp from 'sharp'; +import * as path from 'path'; + +async function debugImage() { + const imagePath = path.join(__dirname, '..', 'images', 'douban', '滑块.png'); + + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + console.log(`图片尺寸: ${width}×${height}`); + console.log('\n=== 测试不同阈值 ===\n'); + + // 测试不同的暗色阈值 + for (const threshold of [60, 80, 100, 120, 140, 160, 180, 200]) { + let darkCount = 0; + const regions: Array<{x: number, y: number, count: number}> = []; + + // 粗略统计 + for (let y = 0; y < height; y += 10) { + for (let x = 0; x < width; x += 10) { + let localDark = 0; + for (let dy = 0; dy < 10 && y + dy < height; dy++) { + for (let dx = 0; dx < 10 && x + dx < width; dx++) { + const idx = ((y + dy) * width + (x + dx)) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + const brightness = r * 0.299 + g * 0.587 + b * 0.114; + + if (brightness < threshold) { + darkCount++; + localDark++; + } + } + } + + if (localDark > 50) { + regions.push({x, y, count: localDark}); + } + } + } + + const darkRatio = (darkCount / (width * height / 100) * 100).toFixed(1); + console.log(`阈值 < ${threshold}: 暗像素比例 ${darkRatio}%, 暗色区域数: ${regions.length}`); + + if (regions.length > 0 && regions.length < 10) { + regions.sort((a, b) => b.count - a.count); + console.log(` 主要暗色区域:`); + regions.slice(0, 3).forEach((r, i) => { + console.log(` 区域${i + 1}: (${r.x}, ${r.y}), 密度: ${r.count}`); + }); + } + } + + console.log('\n=== 测试白色阈值 ===\n'); + + // 测试白色阈值 + for (const threshold of [130, 150, 170, 190, 210]) { + let whiteCount = 0; + const regions: Array<{x: number, y: number, count: number}> = []; + + for (let y = 0; y < height; y += 10) { + for (let x = 0; x < width; x += 10) { + let localWhite = 0; + for (let dy = 0; dy < 10 && y + dy < height; dy++) { + for (let dx = 0; dx < 10 && x + dx < width; dx++) { + const idx = ((y + dy) * width + (x + dx)) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + const brightness = r * 0.299 + g * 0.587 + b * 0.114; + + if (brightness > threshold && Math.abs(r - g) < 60 && Math.abs(g - b) < 60) { + whiteCount++; + localWhite++; + } + } + } + + if (localWhite > 50) { + regions.push({x, y, count: localWhite}); + } + } + } + + const whiteRatio = (whiteCount / (width * height / 100) * 100).toFixed(1); + console.log(`阈值 > ${threshold}: 白像素比例 ${whiteRatio}%, 白色区域数: ${regions.length}`); + + if (regions.length > 0 && regions.length < 15) { + regions.sort((a, b) => b.count - a.count); + console.log(` 主要白色区域:`); + regions.slice(0, 5).forEach((r, i) => { + console.log(` 区域${i + 1}: (${r.x}, ${r.y}), 密度: ${r.count}`); + }); + } + } +} + +debugImage().catch(console.error); diff --git a/src/debug-threshold.ts b/src/debug-threshold.ts new file mode 100644 index 0000000..7438915 --- /dev/null +++ b/src/debug-threshold.ts @@ -0,0 +1,50 @@ +import sharp from 'sharp'; +import * as path from 'path'; + +async function debugImage(imagePath: string) { + const basename = path.basename(imagePath); + console.log(`\n=== 调试: ${basename} ===`); + + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + // 使用不同的阈值测试 + for (const threshold of [70, 80, 90, 100, 110]) { + const darkMap = new Uint8Array(width * height); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + const brightness = (r * 0.299 + g * 0.587 + b * 0.114); + + darkMap[y * width + x] = brightness < threshold ? 1 : 0; + } + } + + // 简单统计暗像素数量 + let darkCount = 0; + for (let i = 0; i < darkMap.length; i++) { + if (darkMap[i] === 1) darkCount++; + } + + console.log(`阈值 ${threshold}: 暗像素 ${darkCount} (${(darkCount / darkMap.length * 100).toFixed(1)}%)`); + } +} + +async function main() { + const baseDir = path.join(__dirname, '..'); + const doubanDir = path.join(baseDir, 'images', 'douban'); + + await debugImage(path.join(doubanDir, '滑块-2.png')); + await debugImage(path.join(doubanDir, '滑块-3.png')); + await debugImage(path.join(doubanDir, '滑块.png')); + await debugImage(path.join(doubanDir, '滑块-6.png')); +} + +main().catch(console.error); diff --git a/src/debug.ts b/src/debug.ts new file mode 100644 index 0000000..22a6298 --- /dev/null +++ b/src/debug.ts @@ -0,0 +1,82 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; + +async function analyzeImage(imagePath: string) { + console.log(`\n分析图片: ${path.basename(imagePath)}`); + + const metadata = await sharp(imagePath).metadata(); + console.log(`尺寸: ${metadata.width}x${metadata.height}`); + console.log(`通道数: ${metadata.channels}`); + console.log(`颜色空间: ${metadata.space}`); + + // 分析颜色分布 + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + // 统计不同颜色区域 + let darkPixels = 0; + let brightPixels = 0; + let colorfulPixels = 0; + + for (let i = 0; i < data.length; i += channels) { + const r = data[i]; + const g = data[i + 1]; + const b = data[i + 2]; + const avg = (r + g + b) / 3; + + if (avg < 50) darkPixels++; + else if (avg > 200) brightPixels++; + + const colorVariance = Math.abs(r - g) + Math.abs(g - b) + Math.abs(b - r); + if (colorVariance > 30) colorfulPixels++; + } + + const totalPixels = (data.length / channels); + console.log(`暗像素: ${(darkPixels / totalPixels * 100).toFixed(1)}%`); + console.log(`亮像素: ${(brightPixels / totalPixels * 100).toFixed(1)}%`); + console.log(`彩色像素: ${(colorfulPixels / totalPixels * 100).toFixed(1)}%`); + + // 生成调试图像 - 边缘检测结果 + const debugDir = path.join(path.dirname(imagePath), '..', 'debug'); + if (!fs.existsSync(debugDir)) { + fs.mkdirSync(debugDir, { recursive: true }); + } + + const basename = path.basename(imagePath, '.png'); + + // 保存灰度图 + await sharp(imagePath) + .greyscale() + .toFile(path.join(debugDir, `${basename}_gray.png`)); + + // 保存边缘检测结果 + await sharp(imagePath) + .greyscale() + .normalize() + .convolve({ + width: 3, + height: 3, + kernel: [-1, -1, -1, -1, 8, -1, -1, -1, -1] + }) + .toFile(path.join(debugDir, `${basename}_edge.png`)); + + console.log(`调试图像已保存到: ${debugDir}`); +} + +async function main() { + const baseDir = path.join(__dirname, '..'); + const doubanDir = path.join(baseDir, 'images', 'douban'); + + // 分析第一张图片 + const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')).slice(0, 3); + + for (const file of files) { + await analyzeImage(path.join(doubanDir, file)); + } +} + +main().catch(console.error); diff --git a/src/detector-cv.ts b/src/detector-cv.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/detector-edge.ts b/src/detector-edge.ts new file mode 100644 index 0000000..75dafe2 --- /dev/null +++ b/src/detector-edge.ts @@ -0,0 +1,413 @@ +import sharp from 'sharp'; + +export interface BoundingBox { + x: number; + y: number; + width: number; + height: number; + score: number; +} + +interface DetectOptions { + downscaleWidth?: number; + expectedWidth?: number; + expectedHeight?: number; + widthTolerance?: number; + heightTolerance?: number; + maxCandidates?: number; +} + +const DEFAULT_EXPECTED_WIDTH = 470; +const DEFAULT_EXPECTED_HEIGHT = 110; +const DEFAULT_TOLERANCE = 0.35; +const DEFAULT_MAX_CANDIDATES = 6; +const CLAMP_EPSILON = 1e-6; + +/** + * 基于梯度能量的滑块检测器,针对豆瓣滑块的长条形状做了定制优化。 + * 算法要点: + * 1. 对整图按固定宽度缩放,保证不同图像的尺度一致。 + * 2. 使用垂直梯度能量(行差分)定位滑块上、下边缘。 + * 3. 在候选上下边界之间利用水平梯度能量(列差分)寻找左右边缘。 + * 4. 结合期望宽高与对比度评分筛选最优候选。 + */ +export class EdgeSliderDetector { + async detectSlider( + imagePath: string, + outputPath?: string, + detectMultiple: boolean = false, + options: DetectOptions = {} + ): Promise { + const { + downscaleWidth = 512, + expectedWidth = DEFAULT_EXPECTED_WIDTH, + expectedHeight = DEFAULT_EXPECTED_HEIGHT, + widthTolerance = DEFAULT_TOLERANCE, + heightTolerance = DEFAULT_TOLERANCE, + maxCandidates = DEFAULT_MAX_CANDIDATES, + } = options; + + const metadata = await sharp(imagePath).metadata(); + if (!metadata.width || !metadata.height) { + throw new Error(`无法读取图片尺寸: ${imagePath}`); + } + + const scale = + metadata.width > downscaleWidth ? downscaleWidth / metadata.width : 1; + const resized = await sharp(imagePath) + .resize({ width: Math.max(1, Math.round(metadata.width * scale)) }) + .greyscale() + .raw() + .toBuffer({ resolveWithObject: true }); + + const { data, info } = resized; + const scaledWidth = info.width; + const scaledHeight = info.height; + + const rowEnergy = this.computeRowGradient(data, scaledWidth, scaledHeight); + const smoothRow = this.smooth(rowEnergy, 9); + + const verticalBand = this.locateVerticalBand( + data, + smoothRow, + scaledWidth, + scaledHeight, + expectedHeight * scale, + heightTolerance, + maxCandidates + ); + + if (!verticalBand) { + return detectMultiple ? [] : null; + } + + const colEnergy = this.computeColumnGradient( + data, + scaledWidth, + scaledHeight, + verticalBand.top, + verticalBand.bottom + ); + const smoothCol = this.smooth(colEnergy, 9); + + const horizontalSpan = this.locateHorizontalSpan( + data, + smoothCol, + scaledWidth, + verticalBand, + expectedWidth * scale, + widthTolerance, + maxCandidates + ); + + if (!horizontalSpan) { + return detectMultiple ? [] : null; + } + + const scaledBox: BoundingBox = { + x: horizontalSpan.left, + y: verticalBand.top, + width: horizontalSpan.right - horizontalSpan.left + 1, + height: verticalBand.bottom - verticalBand.top + 1, + score: verticalBand.score + horizontalSpan.score, + }; + + const box = this.toOriginalBox(scaledBox, scale, metadata.width, metadata.height); + + if (outputPath) { + await this.drawBoxes(imagePath, [box], outputPath); + } + + if (detectMultiple) { + return [box]; + } + return box; + } + + private computeRowGradient(data: Buffer, width: number, height: number): Float32Array { + const grad = new Float32Array(height); + for (let y = 0; y < height - 1; y += 1) { + let sum = 0; + const row = y * width; + const nextRow = (y + 1) * width; + for (let x = 0; x < width; x += 1) { + sum += Math.abs(data[nextRow + x] - data[row + x]); + } + grad[y] = sum / (width + CLAMP_EPSILON); + } + return grad; + } + + private computeColumnGradient( + data: Buffer, + width: number, + height: number, + top: number, + bottom: number + ): Float32Array { + const grad = new Float32Array(width); + const bandHeight = Math.max(1, bottom - top + 1); + for (let x = 0; x < width - 1; x += 1) { + let sum = 0; + for (let y = top; y <= bottom; y += 1) { + const idx = y * width + x; + sum += Math.abs(data[idx + 1] - data[idx]); + } + grad[x] = sum / (bandHeight + CLAMP_EPSILON); + } + return grad; + } + + private smooth(values: Float32Array, window: number): Float32Array { + if (window <= 1) return Float32Array.from(values); + const result = new Float32Array(values.length); + const radius = Math.max(1, Math.floor(window / 2)); + for (let i = 0; i < values.length; i += 1) { + let sum = 0; + let count = 0; + for (let offset = -radius; offset <= radius; offset += 1) { + const idx = i + offset; + if (idx >= 0 && idx < values.length) { + sum += values[idx]; + count += 1; + } + } + result[i] = count > 0 ? sum / count : values[i]; + } + return result; + } + + private locateVerticalBand( + data: Buffer, + rowEnergy: Float32Array, + width: number, + height: number, + expectedHeight: number, + tolerance: number, + maxCandidates: number + ): { top: number; bottom: number; score: number } | null { + const searchStart = Math.floor(height * 0.15); + const searchEnd = Math.max(searchStart + 10, Math.floor(height * 0.95)); + const minHeight = Math.max(20, Math.floor(expectedHeight * (1 - tolerance))); + const maxHeight = Math.max(minHeight + 10, Math.floor(expectedHeight * (1 + tolerance))); + + const topCandidates = this.topIndices(rowEnergy, searchStart, searchEnd, maxCandidates); + if (topCandidates.length === 0) { + return null; + } + + let best: { top: number; bottom: number; score: number } | null = null; + + for (const top of topCandidates) { + const bottomStart = Math.min(height - 2, top + minHeight); + const bottomEnd = Math.min(height - 2, top + maxHeight); + if (bottomEnd <= bottomStart) continue; + + const bottom = this.maxIndex(rowEnergy, bottomStart, bottomEnd); + const bandScore = this.bandContrast(data, width, height, top, bottom); + + const actualHeight = bottom - top; + const expectedPenalty = Math.abs(actualHeight - expectedHeight); + const heightScore = Math.max(0, 1 - expectedPenalty / (expectedHeight * tolerance + 1)); + const score = (rowEnergy[top] + rowEnergy[bottom]) * (0.6 + 0.4 * heightScore) + bandScore * 0.6; + + if (!best || score > best.score) { + best = { top: top, bottom: bottom, score }; + } + } + + return best; + } + + private locateHorizontalSpan( + data: Buffer, + colEnergy: Float32Array, + width: number, + band: { top: number; bottom: number }, + expectedWidth: number, + tolerance: number, + maxCandidates: number + ): { left: number; right: number; score: number } | null { + const minWidth = Math.max(30, Math.floor(expectedWidth * (1 - tolerance))); + const maxWidth = Math.max(minWidth + 20, Math.floor(expectedWidth * (1 + tolerance))); + + const leftCandidates = this.topIndices(colEnergy, 2, width - 3, maxCandidates); + if (leftCandidates.length === 0) { + return null; + } + + let best: { left: number; right: number; score: number } | null = null; + + for (const left of leftCandidates) { + const rightStart = Math.min(width - 3, left + minWidth); + const rightEnd = Math.min(width - 3, left + maxWidth); + if (rightEnd <= rightStart) continue; + + const right = this.maxIndex(colEnergy, rightStart, rightEnd); + const actualWidth = right - left; + if (actualWidth < minWidth || actualWidth > maxWidth) { + continue; + } + + const contrastScore = this.bandContrastColumns(data, width, band.top, band.bottom, left, right); + const widthPenalty = Math.abs(actualWidth - expectedWidth); + const widthScore = Math.max(0, 1 - widthPenalty / (expectedWidth * tolerance + 1)); + const score = + (colEnergy[left] + colEnergy[right]) * (0.6 + 0.4 * widthScore) + contrastScore * 0.4; + + if (!best || score > best.score) { + best = { left, right, score }; + } + } + + return best; + } + + private topIndices( + values: Float32Array, + start: number, + end: number, + maxCount: number + ): number[] { + const pairs: Array<{ index: number; value: number }> = []; + for (let i = start; i < end && i < values.length; i += 1) { + pairs.push({ index: i, value: values[i] }); + } + pairs.sort((a, b) => b.value - a.value); + return pairs.slice(0, maxCount).map(item => item.index); + } + + private maxIndex(values: Float32Array, start: number, end: number): number { + let bestIdx = start; + let bestVal = values[start]; + for (let i = start + 1; i <= end && i < values.length; i += 1) { + if (values[i] > bestVal) { + bestVal = values[i]; + bestIdx = i; + } + } + return bestIdx; + } + + private bandContrast(data: Buffer, width: number, height: number, top: number, bottom: number): number { + const innerMean = this.meanRows(data, width, height, top, bottom); + const topMean = this.meanRows(data, width, height, Math.max(0, top - 12), Math.max(0, top - 1)); + const bottomMean = this.meanRows( + data, + width, + height, + Math.min(height - 1, bottom + 1), + Math.min(height - 1, bottom + 12) + ); + const outsideMean = (topMean + bottomMean) / 2; + return Math.abs(innerMean - outsideMean); + } + + private bandContrastColumns( + data: Buffer, + width: number, + top: number, + bottom: number, + left: number, + right: number + ): number { + const height = Math.floor(data.length / width); + const innerMean = this.meanColumns(data, width, height, top, bottom, left, right); + const leftMean = this.meanColumns( + data, + width, + height, + top, + bottom, + Math.max(0, left - 20), + Math.max(left - 2, left - 1) + ); + const rightMean = this.meanColumns( + data, + width, + height, + top, + bottom, + Math.min(width - 1, right + 1), + Math.min(width - 1, right + 20) + ); + const outsideMean = (leftMean + rightMean) / 2; + return Math.abs(innerMean - outsideMean); + } + + private meanRows( + data: Buffer, + width: number, + height: number, + startRow: number, + endRow: number + ): number { + const s = Math.max(0, Math.min(startRow, height - 1)); + const e = Math.max(s, Math.min(endRow, height - 1)); + let sum = 0; + let count = 0; + for (let y = s; y <= e; y += 1) { + const rowOffset = y * width; + for (let x = 0; x < width; x += 1) { + sum += data[rowOffset + x]; + } + count += width; + } + return count > 0 ? sum / count : 0; + } + + private meanColumns( + data: Buffer, + width: number, + height: number, + top: number, + bottom: number, + startCol: number, + endCol: number + ): number { + const topClamped = Math.max(0, Math.min(top, height - 1)); + const bottomClamped = Math.max(topClamped, Math.min(bottom, height - 1)); + const s = Math.max(0, startCol); + const e = Math.max(s, Math.min(endCol, width - 1)); + let sum = 0; + let count = 0; + for (let x = s; x <= e; x += 1) { + for (let y = topClamped; y <= bottomClamped; y += 1) { + sum += data[y * width + x]; + } + count += bottomClamped - topClamped + 1; + } + return count > 0 ? sum / count : 0; + } + + private toOriginalBox(box: BoundingBox, scale: number, width: number, height: number): BoundingBox { + const inv = scale === 0 ? 1 : 1 / scale; + const x = Math.round(box.x * inv); + const y = Math.round(box.y * inv); + const w = Math.round(box.width * inv); + const h = Math.round(box.height * inv); + return { + x: Math.max(0, Math.min(x, width - 1)), + y: Math.max(0, Math.min(y, height - 1)), + width: Math.max(1, Math.min(w, width - x)), + height: Math.max(1, Math.min(h, height - y)), + score: box.score, + }; + } + + private async drawBoxes(imagePath: string, boxes: BoundingBox[], outputPath: string) { + const image = sharp(imagePath); + const metadata = await image.metadata(); + const svgBoxes = boxes + .map(box => { + return ``; + }) + .join('\n'); + + const svg = `${svgBoxes}`; + + await image + .composite([{ input: Buffer.from(svg), top: 0, left: 0 }]) + .toFile(outputPath); + } +} diff --git a/src/detector-self-learning.ts b/src/detector-self-learning.ts new file mode 100644 index 0000000..a604c95 --- /dev/null +++ b/src/detector-self-learning.ts @@ -0,0 +1,146 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; +import { BoundingBox } from './detector'; + +async function matchTemplate( + image: sharp.Sharp, + template: sharp.Sharp, + searchArea: { x: number; y: number; width: number; height: number }, + excludeBox?: BoundingBox +): Promise<{ maxVal: number; maxLoc: { x: number; y: number } }> { + const { data: imageBuffer, info: imageInfo } = await image + .raw() + .toBuffer({ resolveWithObject: true }); + const { data: templateBuffer, info: templateInfo } = await template + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width: imageWidth, height: imageHeight, channels: imageChannels } = imageInfo; + const { width: templateWidth, height: templateHeight, channels: templateChannels } = templateInfo; + + if (!imageWidth || !imageHeight || !templateWidth || !templateHeight) { + throw new Error('Image or template dimensions are invalid.'); + } + + let maxVal = -Infinity; + let maxLoc = { x: 0, y: 0 }; + + const startY = Math.max(0, searchArea.y); + const endY = Math.min(imageHeight - templateHeight, searchArea.y + searchArea.height); + const startX = Math.max(0, searchArea.x); + const endX = Math.min(imageWidth - templateWidth, searchArea.x + searchArea.width); + + for (let y = startY; y < endY; y++) { + for (let x = startX; x < endX; x++) { + // Exclude the original box area from matching by checking for significant overlap + if (excludeBox) { + const x_overlap = Math.max(0, Math.min(x + templateWidth, excludeBox.x + excludeBox.width) - Math.max(x, excludeBox.x)); + const y_overlap = Math.max(0, Math.min(y + templateHeight, excludeBox.y + excludeBox.height) - Math.max(y, excludeBox.y)); + const overlapArea = x_overlap * y_overlap; + if (overlapArea / (templateWidth * templateHeight) > 0.5) { + continue; + } + } + + let sumC = 0, sumT2 = 0, sumI2 = 0; + + for (let ty = 0; ty < templateHeight; ty++) { + for (let tx = 0; tx < templateWidth; tx++) { + const imageY = y + ty; + const imageX = x + tx; + + const imageIdx = (imageY * imageWidth + imageX) * imageChannels; + const templateIdx = (ty * templateWidth + tx) * templateChannels; + + const imageVal = imageBuffer[imageIdx]; + const templateVal = templateBuffer[templateIdx]; + + sumC += imageVal * templateVal; + sumT2 += templateVal * templateVal; + sumI2 += imageVal * imageVal; + } + } + + const denominator = Math.sqrt(sumT2 * sumI2); + const val = denominator === 0 ? 0 : sumC / denominator; + + if (val > maxVal) { + maxVal = val; + maxLoc = { x, y }; + } + } + } + + return { maxVal, maxLoc }; +} + +export class SelfLearningSliderDetector { + private async cannyEdge(image: sharp.Sharp): Promise { + return image + .grayscale() + .raw() + .toBuffer({ resolveWithObject: true }) + .then(({ data, info }) => { + const sobelData = Buffer.alloc(info.width * info.height); + for (let y = 1; y < info.height - 1; y++) { + for (let x = 1; x < info.width - 1; x++) { + const Gx = -data[(y - 1) * info.width + x - 1] - 2 * data[y * info.width + x - 1] - data[(y + 1) * info.width + x - 1] + data[(y - 1) * info.width + x + 1] + 2 * data[y * info.width + x + 1] + data[(y + 1) * info.width + x + 1]; + const Gy = -data[(y - 1) * info.width + x - 1] - 2 * data[(y - 1) * info.width + x] - data[(y - 1) * info.width + x + 1] + data[(y + 1) * info.width + x - 1] + 2 * data[(y + 1) * info.width + x] + data[(y + 1) * info.width + x + 1]; + const magnitude = Math.sqrt(Gx * Gx + Gy * Gy); + sobelData[y * info.width + x] = magnitude > 50 ? 255 : 0; + } + } + return sharp(sobelData, { raw: { width: info.width, height: info.height, channels: 1 } }); + }); + } + + public async detectSecondSlider(imagePath: string, seedBox: BoundingBox): Promise { + try { + const image = sharp(imagePath); + const { width: imageWidth, height: imageHeight } = await image.metadata(); + + if (!imageWidth || !imageHeight) return null; + + const template = image.clone().extract({ + left: seedBox.x, + top: seedBox.y, + width: seedBox.width, + height: seedBox.height, + }); + + const debugDir = path.join(__dirname, '..', 'images', 'debug'); + if (!fs.existsSync(debugDir)) fs.mkdirSync(debugDir, { recursive: true }); + const templateFileName = `template-${path.basename(imagePath)}`; + await template.toFile(path.join(debugDir, templateFileName)); + console.log(` [SelfLearning] Saved refined template to: ${templateFileName}`); + + const imageEdge = await this.cannyEdge(image); + const templateEdge = await this.cannyEdge(template); + + const searchArea = { + x: 0, + y: Math.max(0, seedBox.y - 20), + width: imageWidth, + height: seedBox.height + 40, + }; + + const { maxVal, maxLoc } = await matchTemplate(imageEdge, templateEdge, searchArea, seedBox); + console.log(` [SelfLearning] Max score for ${path.basename(imagePath)}: ${maxVal.toFixed(4)}`); + + if (maxVal > 0.3) { // Increased threshold for higher confidence + return { + x: maxLoc.x, + y: maxLoc.y, + width: seedBox.width, + height: seedBox.height, + score: maxVal, + }; + } + return null; + } catch (error) { + console.error(`Error during self-learning detection for ${imagePath}:`, error); + return null; + } + } +} diff --git a/src/detector-template.ts b/src/detector-template.ts new file mode 100644 index 0000000..bf10041 --- /dev/null +++ b/src/detector-template.ts @@ -0,0 +1,134 @@ +import sharp from 'sharp'; +import { BoundingBox } from './detector'; + +// TM_CCOEFF_NORMED: 归一化相关系数匹配。对于光照变化不敏感,效果较好。 +async function matchTemplate( + image: sharp.Sharp, + template: sharp.Sharp +): Promise<{ maxVal: number; maxLoc: { x: number; y: number } }> { + const { data: imageBuffer, info: imageInfo } = await image + .raw() + .toBuffer({ resolveWithObject: true }); + const { data: templateBuffer, info: templateInfo } = await template + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width: imageWidth, height: imageHeight, channels: imageChannels } = imageInfo; + const { width: templateWidth, height: templateHeight, channels: templateChannels } = templateInfo; + + if (!imageWidth || !imageHeight || !templateWidth || !templateHeight) { + throw new Error('Image or template dimensions are invalid.'); + } + + let maxVal = -Infinity; + let maxLoc = { x: 0, y: 0 }; + + const resultWidth = imageWidth - templateWidth + 1; + const resultHeight = imageHeight - templateHeight + 1; + + for (let y = 0; y < resultHeight; y++) { + for (let x = 0; x < resultWidth; x++) { + let sumC = 0; + let sumT2 = 0; + let sumI2 = 0; + + for (let ty = 0; ty < templateHeight; ty++) { + for (let tx = 0; tx < templateWidth; tx++) { + const imageY = y + ty; + const imageX = x + tx; + + const imageIdx = (imageY * imageWidth + imageX) * imageChannels; + const templateIdx = (ty * templateWidth + tx) * templateChannels; + + // For Canny edge images, we only need one channel + const imageVal = imageBuffer[imageIdx]; + const templateVal = templateBuffer[templateIdx]; + + sumC += imageVal * templateVal; + sumT2 += templateVal * templateVal; + sumI2 += imageVal * imageVal; + } + } + + const denominator = Math.sqrt(sumT2 * sumI2); + const val = denominator === 0 ? 0 : sumC / denominator; + + if (val > maxVal) { + maxVal = val; + maxLoc = { x, y }; + } + } + } + + return { maxVal, maxLoc }; +} + +export class TemplateSliderDetector { + private async cannyEdge(image: sharp.Sharp): Promise { + // A simplified Canny implementation for template matching + return image + .grayscale() + .convolve({ // Gaussian blur + width: 5, + height: 5, + kernel: [ + 1, 4, 7, 4, 1, + 4, 16, 26, 16, 4, + 7, 26, 41, 26, 7, + 4, 16, 26, 16, 4, + 1, 4, 7, 4, 1, + ], + scale: 273, + }) + .raw() + .toBuffer({ resolveWithObject: true }) + .then(({ data, info }) => { + // Sobel edge detection (simplified) + const sobelData = Buffer.alloc(info.width * info.height); + for (let y = 1; y < info.height - 1; y++) { + for (let x = 1; x < info.width - 1; x++) { + const Gx = + -data[(y - 1) * info.width + x - 1] - 2 * data[y * info.width + x - 1] - data[(y + 1) * info.width + x - 1] + + data[(y - 1) * info.width + x + 1] + 2 * data[y * info.width + x + 1] + data[(y + 1) * info.width + x + 1]; + const Gy = + -data[(y - 1) * info.width + x - 1] - 2 * data[(y - 1) * info.width + x] - data[(y - 1) * info.width + x + 1] + + data[(y + 1) * info.width + x - 1] + 2 * data[(y + 1) * info.width + x] + data[(y + 1) * info.width + x + 1]; + + const magnitude = Math.sqrt(Gx * Gx + Gy * Gy); + sobelData[y * info.width + x] = magnitude > 50 ? 255 : 0; // Threshold + } + } + return sharp(sobelData, { raw: { width: info.width, height: info.height, channels: 1 } }); + }); + } + + public async detect(imagePath: string, templatePath: string): Promise { + try { + const image = sharp(imagePath); + const template = sharp(templatePath); + + const imageEdge = await this.cannyEdge(image); + const templateEdge = await this.cannyEdge(template); + + const { width: templateWidth, height: templateHeight } = await template.metadata(); + + const { maxVal, maxLoc } = await matchTemplate(imageEdge, templateEdge); + + console.log(` Template: ${templatePath}, Score: ${maxVal.toFixed(4)} at (${maxLoc.x}, ${maxLoc.y})`); + + if (maxVal > 0.3) { // Correlation threshold + return { + x: maxLoc.x, + y: maxLoc.y, + width: templateWidth || 0, + height: templateHeight || 0, + score: maxVal, + }; + } + return null; + } catch (error) { + console.error(`Error during template matching for ${imagePath}:`, error); + return null; + } + } +} diff --git a/src/detector.ts b/src/detector.ts new file mode 100644 index 0000000..a28837d --- /dev/null +++ b/src/detector.ts @@ -0,0 +1,1092 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; +import { SliderValidator, BoundingBox as ValidatorBox } from './validator'; +import { SelfLearningSliderDetector } from './detector-self-learning'; + +export interface BoundingBox { + x: number; + y: number; + width: number; + height: number; + score: number; +} + +export class SliderDetector { + /** + * 检测滑块位置 - 采用新的自学习流程 + * @param imagePath 图像路径 + * @param outputPath 输出路径 + * @param detectMultiple 是否检测多个滑块(在此实现中,总是尝试检测多个) + */ + async detectSlider( + imagePath: string, + outputPath?: string, + detectMultiple: boolean = true + ): Promise { + try { + const image = sharp(imagePath); + const initialCandidates = await this.findInitialCandidates(imagePath); + + if (initialCandidates.length === 0) { + console.log(` [Detector] No initial candidates found for ${path.basename(imagePath)}.`); + return null; + } + + // 选择分数最高的候选框作为种子 + const bestCandidate = initialCandidates.reduce((prev, current) => + (prev.score > current.score) ? prev : current + ); + + console.log(` [Detector] Best initial candidate for ${path.basename(imagePath)} at x: ${bestCandidate.x}, y: ${bestCandidate.y} (score: ${bestCandidate.score.toFixed(3)})`); + + // 精炼最佳候选框 + const edgeImage = await this.cannyEdge(image); + const refinedSeedBox = await this.refineBox(bestCandidate, edgeImage); + console.log(` [Detector] Refined seed box to x: ${refinedSeedBox.x}, y: ${refinedSeedBox.y}, w: ${refinedSeedBox.width}, h: ${refinedSeedBox.height}`); + + + // 使用自学习检测器寻找第二个滑块 + const selfLearningDetector = new SelfLearningSliderDetector(); + const secondSlider = await selfLearningDetector.detectSecondSlider(imagePath, refinedSeedBox); + + const finalDetections = [refinedSeedBox]; + if (secondSlider) { + // 验证第二个滑块是否与第一个重叠过多 + const iou = this.calculateIoU(refinedSeedBox, secondSlider); + if (iou < 0.5) { + console.log(` [Detector] Found second slider at x: ${secondSlider.x}, y: ${secondSlider.y} with score ${secondSlider.score.toFixed(3)}`); + finalDetections.push(secondSlider); + } else { + console.log(` [Detector] Discarded second slider due to high overlap (IoU: ${iou.toFixed(3)}).`); + } + } else { + console.log(` [Detector] Self-learning detector did not find a second slider.`); + } + + if (outputPath) { + await this.drawBoundingBoxes(imagePath, finalDetections, outputPath, 'blue'); + } + + return finalDetections; + } catch (error) { + console.error(`Error in new detectSlider for ${imagePath}:`, error); + return null; + } + } + + /** + * Canny边缘检测 + */ + private async cannyEdge(image: sharp.Sharp): Promise { + return image + .grayscale() + .raw() + .toBuffer({ resolveWithObject: true }) + .then(({ data, info }) => { + const sobelData = Buffer.alloc(info.width * info.height); + for (let y = 1; y < info.height - 1; y++) { + for (let x = 1; x < info.width - 1; x++) { + const Gx = -data[(y - 1) * info.width + x - 1] - 2 * data[y * info.width + x - 1] - data[(y + 1) * info.width + x - 1] + data[(y - 1) * info.width + x + 1] + 2 * data[y * info.width + x + 1] + data[(y + 1) * info.width + x + 1]; + const Gy = -data[(y - 1) * info.width + x - 1] - 2 * data[(y - 1) * info.width + x] - data[(y - 1) * info.width + x + 1] + data[(y + 1) * info.width + x - 1] + 2 * data[(y + 1) * info.width + x] + data[(y + 1) * info.width + x + 1]; + const magnitude = Math.sqrt(Gx * Gx + Gy * Gy); + sobelData[y * info.width + x] = magnitude > 50 ? 255 : 0; + } + } + return sharp(sobelData, { raw: { width: info.width, height: info.height, channels: 1 } }); + }); + } + + /** + * 利用边缘投影精确裁剪边界框 + */ + private async refineBox(box: BoundingBox, edgeImage: sharp.Sharp): Promise { + try { + const { data, info } = await edgeImage + .clone() + .extract({ left: box.x, top: box.y, width: box.width, height: box.height }) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height } = info; + + const projX = new Array(width).fill(0); + const projY = new Array(height).fill(0); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const pixel = data[y * width + x]; + if (pixel > 0) { + projX[x]++; + projY[y]++; + } + } + } + + const findBounds = (proj: number[], minThreshold: number = 2): { start: number; end: number } => { + let start = -1, end = -1; + + // Find first and last edge pixels + for (let i = 0; i < proj.length; i++) { + if (proj[i] >= minThreshold) { + if (start === -1) start = i; + end = i; + } + } + + if (start === -1) return { start: 0, end: proj.length - 1 }; // No edges found, return original + + // Refine start: move inwards from the first found edge + let bestStart = start; + for (let i = start; i < Math.min(proj.length, start + 10); i++) { + if (proj[i] >= minThreshold) { + bestStart = i; + break; + } + } + + // Refine end: move inwards from the last found edge + let bestEnd = end; + for (let i = end; i >= Math.max(0, end - 10); i--) { + if (proj[i] >= minThreshold) { + bestEnd = i; + break; + } + } + + return { start: bestStart, end: bestEnd }; + }; + + const { start: xStart, end: xEnd } = findBounds(projX); + const { start: yStart, end: yEnd } = findBounds(projY); + + const newX = box.x + xStart; + const newY = box.y + yStart; + const newWidth = xEnd - xStart + 1; + const newHeight = yEnd - yStart + 1; + + // Basic sanity check for refinement + if (newWidth <= 10 || newHeight <= 10 || newWidth > box.width * 1.2 || newHeight > box.height * 1.2) { + return box; // Return original if refinement is unreasonable + } + + return { + x: newX, + y: newY, + width: newWidth, + height: newHeight, + score: box.score, + }; + } catch (e) { + console.error(` [RefineBox] Error refining box, returning original.`, e); + return box; + } + } + + /** + * 初始候选查找:融合亮度、边缘检测 + */ + private async findInitialCandidates(imagePath: string): Promise { + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + // 策略A:白色+暗色混合检测 + const mixedBoxes = this.detectDarkRegions(data, width, height, channels, true); + + // 策略B:简化的边缘检测 + const edgeBoxes = this.detectByEdges(data, width, height, channels); + + // 合并结果 + let allBoxes = [...mixedBoxes]; + for (const eb of edgeBoxes) { + if (!allBoxes.some(mb => this.calculateIoU(mb, eb) > 0.5)) { + allBoxes.push(eb); + } + } + + // 过滤和排序 + allBoxes = allBoxes.filter(candidate => { + const aspectRatio = candidate.width / candidate.height; + const sizeDiff = Math.abs(candidate.width - candidate.height); + return aspectRatio >= 0.85 && aspectRatio <= 1.18 && sizeDiff <= 20; + }); + + allBoxes.sort((a, b) => { + const scoreA = (1 - Math.abs(a.width / a.height - 1)) + a.score / (a.width * a.height); + const scoreB = (1 - Math.abs(b.width / b.height - 1)) + b.score / (b.width * b.height); + return scoreB - scoreA; + }); + + // 去重 + const uniqueBoxes: BoundingBox[] = []; + for (const box of allBoxes) { + if (!uniqueBoxes.some(ub => this.calculateIoU(ub, box) > 0.5)) { + uniqueBoxes.push(box); + } + } + + return uniqueBoxes; + } + + /** + * 简化的边缘检测策略(基于Sobel算子) + */ + private detectByEdges(data: Buffer, width: number, height: number, channels: number): BoundingBox[] { + // 1. 转灰度 + const gray = new Uint8Array(width * height); + for (let i = 0; i < width * height; i++) { + const idx = i * channels; + gray[i] = Math.round(data[idx] * 0.299 + data[idx + 1] * 0.587 + data[idx + 2] * 0.114); + } + + // 2. Sobel边缘检测 + const edges = new Uint8Array(width * height); + for (let y = 1; y < height - 1; y++) { + for (let x = 1; x < width - 1; x++) { + const gx = -gray[(y-1)*width + x-1] + gray[(y-1)*width + x+1] + -2*gray[y*width + x-1] + 2*gray[y*width + x+1] + -gray[(y+1)*width + x-1] + gray[(y+1)*width + x+1]; + + const gy = -gray[(y-1)*width + x-1] - 2*gray[(y-1)*width + x] - gray[(y-1)*width + x+1] + +gray[(y+1)*width + x-1] + 2*gray[(y+1)*width + x] + gray[(y+1)*width + x+1]; + + const magnitude = Math.sqrt(gx*gx + gy*gy); + edges[y*width + x] = magnitude > 40 ? 1 : 0; + } + } + + // 3. 膨胀连接边缘 + const dilated = this.dilate(edges, width, height, 4); + + // 4. 查找连通区域 + const regions = this.findDarkRegionsList(dilated, width, height); + + // 5. 筛选候选 + return this.selectBestRegions(regions, width, height, true); + } + + /** + * 验证候选区域的色调一致性(真实滑块的特征) + */ + private verifyWhiteBorder( + data: Buffer, + width: number, + height: number, + channels: number, + box: BoundingBox + ): boolean { + // 采样区域内的像素,计算色调方差 + const samples: Array<{h: number, s: number, v: number}> = []; + + // 每隔4个像素采样一次,避免计算量过大 + for (let y = box.y; y < box.y + box.height; y += 4) { + for (let x = box.x; x < box.x + box.width; x += 4) { + if (x >= width || y >= height) continue; + + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + + // 转换为HSV色彩空间 + const hsv = this.rgbToHsv(r, g, b); + samples.push(hsv); + } + } + + if (samples.length < 10) return true; // 样本太少,无法判断 + + // 计算色调(H)的标准差 + const hues = samples.map(s => s.h); + const avgHue = hues.reduce((a, b) => a + b, 0) / hues.length; + + // 处理色调的循环性(0-360度) + let sumSquaredDiff = 0; + for (const h of hues) { + let diff = Math.abs(h - avgHue); + // 处理色调循环(例如 350° 和 10° 实际很接近) + if (diff > 180) diff = 360 - diff; + sumSquaredDiff += diff * diff; + } + + const hueStdDev = Math.sqrt(sumSquaredDiff / hues.length); + + // 计算饱和度(S)的标准差 + const saturations = samples.map(s => s.s); + const avgSat = saturations.reduce((a, b) => a + b, 0) / saturations.length; + const satStdDev = Math.sqrt( + saturations.reduce((sum, s) => sum + Math.pow(s - avgSat, 2), 0) / saturations.length + ); + + // 如果色调标准差小于60度,且饱和度标准差小于0.30,认为色调一致 + // 这表示区域内颜色比较统一,是真实滑块的可能性较大 + console.log(` 色调一致性 [x=${box.x}, y=${box.y}]: hueStdDev=${hueStdDev.toFixed(1)}°, satStdDev=${satStdDev.toFixed(3)}, avgSat=${avgSat.toFixed(3)}`); + return hueStdDev < 60 && satStdDev < 0.30; + } + + /** + * RGB转HSV色彩空间 + */ + private rgbToHsv(r: number, g: number, b: number): {h: number, s: number, v: number} { + r = r / 255; + g = g / 255; + b = b / 255; + + const max = Math.max(r, g, b); + const min = Math.min(r, g, b); + const delta = max - min; + + let h = 0; + let s = max === 0 ? 0 : delta / max; + let v = max; + + if (delta !== 0) { + if (max === r) { + h = 60 * (((g - b) / delta) % 6); + } else if (max === g) { + h = 60 * ((b - r) / delta + 2); + } else { + h = 60 * ((r - g) / delta + 4); + } + } + + if (h < 0) h += 360; + + return { h, s, v }; + } + + /** + * 检测暗色区域(滑块缺口) - 支持检测多个 + */ + private detectDarkRegions( + data: Buffer, + width: number, + height: number, + channels: number, + detectMultiple: boolean = false + ): BoundingBox[] { + // 混合策略:同时检测白色边缘和暗色区域 + const allCandidates: BoundingBox[] = []; + + // 策略1:检测白色边缘(滑块轮廓)- 增加更多阈值 + for (const brightThreshold of [130, 150, 170, 190, 210]) { + const whiteMap = new Uint8Array(width * height); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + const brightness = (r * 0.299 + g * 0.587 + b * 0.114); + + const isWhite = brightness > brightThreshold && + Math.abs(r - g) < 60 && + Math.abs(g - b) < 60; + + whiteMap[y * width + x] = isWhite ? 1 : 0; + } + } + + const dilated = this.dilate(whiteMap, width, height, 5); + const regions = this.findDarkRegionsList(dilated, width, height); + const candidates = this.selectBestRegions(regions, width, height, true); + + for (const c of candidates) { + if (!allCandidates.some(e => this.calculateIoU(e, c) > 0.5)) { + allCandidates.push(c); + } + } + } + + // 策略2:检测暗色区域(滑块内部)- 增加更多阈值 + for (const darkThreshold of [60, 80, 100, 120, 140]) { + const darkMap = new Uint8Array(width * height); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + const brightness = (r * 0.299 + g * 0.587 + b * 0.114); + + darkMap[y * width + x] = brightness < darkThreshold ? 1 : 0; + } + } + + const cleaned = this.morphologyClose(darkMap, width, height, 2); + const regions = this.findDarkRegionsList(cleaned, width, height); + const candidates = this.selectBestRegions(regions, width, height, true); + + for (const c of candidates) { + if (!allCandidates.some(e => this.calculateIoU(e, c) > 0.5)) { + allCandidates.push(c); + } + } + } + + if (allCandidates.length === 0) return []; + + // 排序并选择最佳2个 + allCandidates.sort((a, b) => { + const scoreA = Math.abs(a.width / a.height - 1) * 5 + Math.abs(a.width - 88) / 30; + const scoreB = Math.abs(b.width / b.height - 1) * 5 + Math.abs(b.width - 88) / 30; + return scoreA - scoreB; + }); + + const selected: BoundingBox[] = []; + for (const candidate of allCandidates) { + const overlaps = selected.some(s => this.calculateIoU(s, candidate) > 0.2); // 降低IoU阈值 + if (!overlaps) { + selected.push(candidate); + if (selected.length >= 3) break; // 增加到3个候选 + } + } + + // 如果检测到的数量不够,尝试放宽条件再找一次 + if (selected.length < 2) { + const relaxed = allCandidates.filter(c => + !selected.some(s => this.calculateIoU(s, c) > 0.1) && + c.width >= 65 && c.width <= 115 && + c.height >= 65 && c.height <= 115 + ); + + for (const candidate of relaxed) { + if (selected.length >= 2) break; + selected.push(candidate); + } + } + + return detectMultiple ? selected.slice(0, 2) : (selected.length > 0 ? [selected[0]] : []); + } + + /** + * 查找暗色连通区域 - 返回列表 + */ + private findDarkRegionsList( + binary: Uint8Array, + width: number, + height: number + ): BoundingBox[] { + const visited = new Uint8Array(width * height); + const regions: BoundingBox[] = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + + if (visited[idx] === 0 && binary[idx] === 1) { + const region = this.floodFill(binary, visited, x, y, width, height); + + if (region.width >= 20 && region.height >= 20) { + regions.push(region); + } + } + } + } + + return regions; + } + + /** + * 选择最佳候选区域 - 支持返回多个 + */ + private selectBestRegions( + regions: BoundingBox[], + imageWidth: number, + imageHeight: number, + selectMultiple: boolean = false + ): BoundingBox[] { + if (regions.length === 0) return []; + + // 过滤掉太大的区域 + const validRegions = regions.filter(r => + r.width < imageWidth * 0.5 && r.height < imageHeight * 0.5 + ); + + // 豆瓣滑块缺口特征: + // 1. 宽度 50-100 像素 + // 2. 高度 50-170 像素(放宽以适应不同形状) + // 3. 宽高比 0.5-1.8(允许一定变形) + // 4. 高像素密度(填充率) + + const candidates = validRegions.filter(region => { + const aspectRatio = region.width / region.height; + const centerY = region.y + region.height / 2; + + // 滑块特征(更严格的正方形要求): + // 1. 大小 70-110px (正方形拼图块) + // 2. 宽高比 0.85-1.18 (严格的正方形,只允许小幅度变形) + // 3. 位置在图片的合理范围内 + // 4. 宽度和高度差异不超过20px + const sizeDiff = Math.abs(region.width - region.height); + + return ( + region.width >= 70 && region.width <= 110 && + region.height >= 70 && region.height <= 110 && + aspectRatio >= 0.85 && aspectRatio <= 1.18 && + sizeDiff <= 20 && // 宽高差不超过20px,确保是正方形 + centerY > imageHeight * 0.10 && + centerY < imageHeight * 0.80 + ); + }); + + if (candidates.length === 0) return []; + + // 按照质量排序:优先选择接近正方形且密度高的 + candidates.sort((a, b) => { + const densityA = a.score / (a.width * a.height); + const densityB = b.score / (b.width * b.height); + const aspectScoreA = Math.abs(a.width / a.height - 1); + const aspectScoreB = Math.abs(b.width / b.height - 1); + + // 密度优先,然后是形状 + return (densityB * 3 - aspectScoreB) - (densityA * 3 - aspectScoreA); + }); + + // 返回前N个不重叠的候选 + const selected: BoundingBox[] = []; + for (const candidate of candidates) { + // 检查是否与已选择的重叠 + const overlaps = selected.some(s => { + const iou = this.calculateIoU(s, candidate); + return iou > 0.3; // IoU > 0.3 认为重叠 + }); + + if (!overlaps) { + selected.push(candidate); + if (!selectMultiple && selected.length >= 1) break; + if (selectMultiple && selected.length >= 3) break; // 最多返回3个 + } + } + + return selected; + } + + /** + * 计算IoU - 移到这里以便selectBestRegions使用 + */ + private calculateIoU(box1: BoundingBox, box2: BoundingBox): number { + const x1 = Math.max(box1.x, box2.x); + const y1 = Math.max(box1.y, box2.y); + const x2 = Math.min(box1.x + box1.width, box2.x + box2.width); + const y2 = Math.min(box1.y + box1.height, box2.y + box2.height); + + const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); + const box1Area = box1.width * box1.height; + const box2Area = box2.width * box2.height; + const unionArea = box1Area + box2Area - intersectionArea; + + return unionArea > 0 ? intersectionArea / unionArea : 0; + } + + /** + * 形态学闭运算 + */ + private morphologyClose( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number + ): Uint8Array { + const dilated = this.dilate(binary, width, height, kernelSize); + return this.erode(dilated, width, height, kernelSize); + } + + /** + * 膨胀操作 + */ + private dilate( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number + ): Uint8Array { + const result = new Uint8Array(width * height); + const offset = Math.floor(kernelSize / 2); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + let maxVal = 0; + + for (let ky = -offset; ky <= offset; ky++) { + for (let kx = -offset; kx <= offset; kx++) { + const ny = y + ky; + const nx = x + kx; + + if (nx >= 0 && nx < width && ny >= 0 && ny < height) { + maxVal = Math.max(maxVal, binary[ny * width + nx]); + } + } + } + + result[y * width + x] = maxVal; + } + } + + return result; + } + + /** + * 腐蚀操作 + */ + private erode( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number + ): Uint8Array { + const result = new Uint8Array(width * height); + const offset = Math.floor(kernelSize / 2); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + let minVal = 1; + + for (let ky = -offset; ky <= offset; ky++) { + for (let kx = -offset; kx <= offset; kx++) { + const ny = y + ky; + const nx = x + kx; + + if (nx >= 0 && nx < width && ny >= 0 && ny < height) { + minVal = Math.min(minVal, binary[ny * width + nx]); + } + } + } + + result[y * width + x] = minVal; + } + } + + return result; + } + + /** + * 查找暗色连通区域 + */ + private findDarkRegions( + binary: Uint8Array, + width: number, + height: number + ): BoundingBox[] { + const visited = new Uint8Array(width * height); + const regions: BoundingBox[] = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + + if (visited[idx] === 0 && binary[idx] === 1) { + const region = this.floodFill(binary, visited, x, y, width, height); + + if (region.width >= 20 && region.height >= 20) { + regions.push(region); + } + } + } + } + + return regions; + } + + /** + * 洪水填充算法 + */ + private floodFill( + binary: Uint8Array, + visited: Uint8Array, + startX: number, + startY: number, + width: number, + height: number + ): BoundingBox { + let minX = startX; + let minY = startY; + let maxX = startX; + let maxY = startY; + let pixelCount = 0; + + const stack: Array<[number, number]> = [[startX, startY]]; + + while (stack.length > 0) { + const [x, y] = stack.pop()!; + + if (x < 0 || x >= width || y < 0 || y >= height) continue; + + const idx = y * width + x; + if (visited[idx] === 1) continue; + if (binary[idx] !== 1) continue; + + visited[idx] = 1; + pixelCount++; + + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + + stack.push([x + 1, y]); + stack.push([x - 1, y]); + stack.push([x, y + 1]); + stack.push([x, y - 1]); + } + + return { + x: minX, + y: minY, + width: maxX - minX + 1, + height: maxY - minY + 1, + score: pixelCount + }; + } + + /** + * 选择最佳候选区域 + */ + private selectBestRegion( + regions: BoundingBox[], + imageWidth: number, + imageHeight: number + ): BoundingBox | null { + if (regions.length === 0) return null; + + // 过滤掉整个图片大小的区域 + const validRegions = regions.filter(r => + r.width < imageWidth * 0.9 && r.height < imageHeight * 0.9 + ); + + if (validRegions.length === 0) return null; + + // 豆瓣滑块缺口特征: + // 1. 宽度 50-95 像素(放宽上限) + // 2. 高度 50-95 像素 + // 3. 宽高比接近 1(正方形缺口) + // 4. 高像素密度(填充率 > 0.60)- 降低阈值以适应对比度低的图片 + + const candidates = validRegions.filter(region => { + const aspectRatio = region.width / region.height; + const density = region.score / (region.width * region.height); + const centerY = region.y + region.height / 2; + + return ( + region.width >= 50 && region.width <= 95 && + region.height >= 50 && region.height <= 95 && + aspectRatio >= 0.85 && aspectRatio <= 1.18 && + centerY > imageHeight * 0.12 && + centerY < imageHeight * 0.78 && + density > 0.60 // 降低密度阈值 + ); + }); + + if (candidates.length > 0) { + // 优先选择密度最高且最接近正方形的 + candidates.sort((a, b) => { + const densityA = a.score / (a.width * a.height); + const densityB = b.score / (b.width * b.height); + const aspectScoreA = Math.abs(a.width / a.height - 1); + const aspectScoreB = Math.abs(b.width / b.height - 1); + + // 密度权重更高 + return (densityB * 2 + (1 - aspectScoreB)) - (densityA * 2 + (1 - aspectScoreA)); + }); + + return candidates[0]; + } + + // 放宽条件再试一次 + const relaxedCandidates = validRegions.filter(region => { + const aspectRatio = region.width / region.height; + const density = region.score / (region.width * region.height); + + return ( + region.width >= 45 && region.width <= 100 && + region.height >= 45 && region.height <= 100 && + aspectRatio >= 0.75 && aspectRatio <= 1.33 && + region.y < imageHeight * 0.82 && + region.y > imageHeight * 0.06 && + density > 0.45 // 进一步降低 + ); + }); + + if (relaxedCandidates.length === 0) return null; + + // 选择密度最高的 + relaxedCandidates.sort((a, b) => { + const densityA = a.score / (a.width * a.height); + const densityB = b.score / (b.width * b.height); + return densityB - densityA; + }); + + return relaxedCandidates[0]; + } + + /** + * 在图像上绘制边界框(支持多个) + */ + private async drawBoundingBoxes( + imagePath: string, + boxes: BoundingBox[], + outputPath: string, + color: 'red' | 'blue' | 'green' = 'blue' + ): Promise { + const colorMap = { + red: { r: 255, g: 0, b: 0 }, + blue: { r: 0, g: 0, b: 255 }, + green: { r: 0, g: 255, b: 0 } + }; + + const rgb = colorMap[color]; + const lineWidth = 3; + + const image = sharp(imagePath); + const metadata = await image.metadata(); + + if (!metadata.width || !metadata.height) { + throw new Error('Cannot get image dimensions'); + } + + // 创建SVG覆盖层绘制所有矩形框 + const rects = boxes.map(box => ` + `).join('\n'); + + const svg = ` + + ${rects} + + `; + + await image + .composite([{ + input: Buffer.from(svg), + top: 0, + left: 0 + }]) + .toFile(outputPath); + } + + /** + * 验证检测结果 + */ + async validateDetection( + imagePath: string, + targetImagePath: string + ): Promise<{ match: boolean; iou: number }> { + const targetBox = await this.extractRedBox(targetImagePath); + if (!targetBox) { + console.log(`No red box found in ${targetImagePath}`); + return { match: false, iou: 0 }; + } + + const detected = await this.detectSlider(imagePath, undefined, true); // 检测多个 + if (!detected) { + console.log(`No slider detected in ${imagePath}`); + return { match: false, iou: 0 }; + } + + const detectedBoxes = Array.isArray(detected) ? detected : [detected]; + + // 检查是否有任何一个检测框在红框内 + let bestIoU = 0; + let anyInside = false; + + for (const detectedBox of detectedBoxes) { + const isInside = this.isBoxInside(detectedBox, targetBox); + const iou = this.calculateIoU(detectedBox, targetBox); + + if (isInside) anyInside = true; + bestIoU = Math.max(bestIoU, iou); + } + + // 如果任何缺口在红框内,认为检测成功 + const match = anyInside; + + return { match, iou: bestIoU }; + } + + /** + * 检查box1是否在box2内部(或大部分重叠) + */ + private isBoxInside(box1: BoundingBox, box2: BoundingBox): boolean { + const centerX = box1.x + box1.width / 2; + const centerY = box1.y + box1.height / 2; + + // 检查中心点是否在box2内 + return ( + centerX >= box2.x && + centerX <= box2.x + box2.width && + centerY >= box2.y && + centerY <= box2.y + box2.height + ); + } + + /** + * 从标注图像中提取红色框 + */ + private async extractRedBox(imagePath: string): Promise { + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + let minX = width; + let minY = height; + let maxX = 0; + let maxY = 0; + let foundRed = false; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + + if (r > 200 && g < 100 && b < 100) { + foundRed = true; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + } + } + + if (!foundRed) return null; + + return { + x: minX, + y: minY, + width: maxX - minX + 1, + height: maxY - minY + 1, + score: 1.0 + }; + } +} + +async function main() { + const detector = new SliderDetector(); + const validator = new SliderValidator(); + const baseDir = path.join(__dirname, '..'); + + console.log('=== 开始滑块检测 ===\n'); + + // 1. 验证算法准确性(使用新的验证器) + console.log('1. 验证算法准确性(容差:10px)...\n'); + const doubanTargetDir = path.join(baseDir, 'images', 'douban-target'); + const doubanDir = path.join(baseDir, 'images', 'douban'); + + if (fs.existsSync(doubanTargetDir)) { + const targetFiles = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png')); + let totalMatched = 0; + let totalTargets = 0; + let totalDetected = 0; + + for (const file of targetFiles) { + const targetPath = path.join(doubanTargetDir, file); + const imagePath = path.join(doubanDir, file); + + if (!fs.existsSync(imagePath)) { + console.log(` 跳过 ${file} (原图不存在)`); + continue; + } + + // 提取标准答案(红框) + const targetBoxes = await validator.extractRedBoxes(targetPath); + + // 检测滑块(检测所有可能的滑块) + const detected = await detector.detectSlider(imagePath, undefined, true); + const detectedBoxes = detected ? (Array.isArray(detected) ? detected : [detected]) : []; + + // 转换为验证器的格式 + const detectedValidatorBoxes: ValidatorBox[] = detectedBoxes.map(b => ({ + x: b.x, + y: b.y, + width: b.width, + height: b.height + })); + + // 验证 + const result = await validator.validateDetection(detectedValidatorBoxes, targetBoxes, 10); + + console.log(` ${file}:`); + console.log(` 目标: ${result.totalTargets}个, 检测: ${result.detectedCount}个, 匹配: ${result.matchedCount}个`); + console.log(` 准确率: ${(result.precision * 100).toFixed(1)}%, 召回率: ${(result.recall * 100).toFixed(1)}%`); + + if (result.matches.length > 0) { + result.matches.forEach((m, i) => { + console.log(` 匹配${i + 1}: IoU=${m.iou.toFixed(3)}`); + }); + } + + if (result.matchedCount < result.totalTargets) { + console.log(` ⚠️ 漏检: ${result.totalTargets - result.matchedCount}个滑块`); + } + + if (result.unmatched.length > 0) { + console.log(` ⚠️ 误检: ${result.unmatched.length}个`); + } + + console.log(''); + + totalMatched += result.matchedCount; + totalTargets += result.totalTargets; + totalDetected += result.detectedCount; + } + + const overallPrecision = totalDetected > 0 ? (totalMatched / totalDetected * 100).toFixed(1) : '0.0'; + const overallRecall = totalTargets > 0 ? (totalMatched / totalTargets * 100).toFixed(1) : '0.0'; + + console.log(`总体统计:`); + console.log(` 总目标数: ${totalTargets}个`); + console.log(` 总检测数: ${totalDetected}个`); + console.log(` 成功匹配: ${totalMatched}个`); + console.log(` 准确率(Precision): ${overallPrecision}%`); + console.log(` 召回率(Recall): ${overallRecall}%\n`); + } + + // 2. 处理所有豆瓣图片并输出结果 + console.log('2. 处理豆瓣滑块图片...'); + const outputDir = path.join(baseDir, 'images', 'output'); + + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); + } + + if (fs.existsSync(doubanDir)) { + const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')); + let processedCount = 0; + + for (const file of files) { + const inputPath = path.join(doubanDir, file); + const outputPath = path.join(outputDir, file); + + const result = await detector.detectSlider(inputPath, outputPath, true); + + if (result && result.length > 0) { + console.log(` ✅ Processed ${file}, found ${result.length} sliders.`); + processedCount++; + } else { + console.log(` ❌ Processed ${file}, but no slider was found.`); + } + } + + if (files.length > 0) { + console.log(`\n Processed ${processedCount} out of ${files.length} images.`); + } + } +} + +main(); diff --git a/src/edge-detector.ts b/src/edge-detector.ts new file mode 100644 index 0000000..bd40d29 --- /dev/null +++ b/src/edge-detector.ts @@ -0,0 +1,392 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; + +interface BoundingBox { + x: number; + y: number; + width: number; + height: number; + score: number; +} + +export class EdgeDetector { + /** + * 检测滑块 - 基于边缘检测 + */ + async detectSlider( + imagePath: string, + outputPath?: string, + detectMultiple: boolean = false + ): Promise { + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + // 1. 转换为灰度并计算梯度(边缘强度) + const edges = this.detectEdges(data, width, height, channels); + + // 2. 应用阈值获取强边缘 + const binary = this.thresholdEdges(edges, width, height); + + // 3. 形态学操作连接边缘 + const connected = this.morphologyClose(binary, width, height, 3); + + // 4. 查找连通区域 + const regions = this.findEdgeRegionsList(connected, width, height); + + // 5. 筛选候选 + const candidates = this.selectBestRegions(regions, width, height, true); + + if (candidates.length === 0) { + return detectMultiple ? [] : null; + } + + // 6. 如果需要输出可视化 + if (outputPath) { + await this.drawBoxes(imagePath, candidates, outputPath); + } + + return detectMultiple ? candidates : candidates[0]; + } + + /** + * Sobel边缘检测 + */ + private detectEdges( + data: Buffer, + width: number, + height: number, + channels: number + ): Float32Array { + const edges = new Float32Array(width * height); + + // Sobel算子 + const sobelX = [ + [-1, 0, 1], + [-2, 0, 2], + [-1, 0, 1] + ]; + + const sobelY = [ + [-1, -2, -1], + [0, 0, 0], + [1, 2, 1] + ]; + + for (let y = 1; y < height - 1; y++) { + for (let x = 1; x < width - 1; x++) { + let gx = 0; + let gy = 0; + + // 计算Sobel梯度 + for (let ky = -1; ky <= 1; ky++) { + for (let kx = -1; kx <= 1; kx++) { + const idx = ((y + ky) * width + (x + kx)) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + const brightness = r * 0.299 + g * 0.587 + b * 0.114; + + gx += brightness * sobelX[ky + 1][kx + 1]; + gy += brightness * sobelY[ky + 1][kx + 1]; + } + } + + const magnitude = Math.sqrt(gx * gx + gy * gy); + edges[y * width + x] = magnitude; + } + } + + return edges; + } + + /** + * 边缘二值化 + */ + private thresholdEdges( + edges: Float32Array, + width: number, + height: number + ): Uint8Array { + // 计算边缘强度的统计信息 + let max = 0; + for (let i = 0; i < edges.length; i++) { + max = Math.max(max, edges[i]); + } + + // 使用自适应阈值(最大值的20%) + const threshold = max * 0.15; + + const binary = new Uint8Array(width * height); + for (let i = 0; i < edges.length; i++) { + binary[i] = edges[i] > threshold ? 1 : 0; + } + + return binary; + } + + /** + * 形态学闭运算 + */ + private morphologyClose( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number + ): Uint8Array { + const dilated = this.dilate(binary, width, height, kernelSize); + return this.erode(dilated, width, height, kernelSize); + } + + /** + * 膨胀操作 + */ + private dilate( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number + ): Uint8Array { + const result = new Uint8Array(width * height); + const offset = Math.floor(kernelSize / 2); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + let maxVal = 0; + + for (let ky = -offset; ky <= offset; ky++) { + for (let kx = -offset; kx <= offset; kx++) { + const ny = y + ky; + const nx = x + kx; + + if (nx >= 0 && nx < width && ny >= 0 && ny < height) { + maxVal = Math.max(maxVal, binary[ny * width + nx]); + } + } + } + + result[y * width + x] = maxVal; + } + } + + return result; + } + + /** + * 腐蚀操作 + */ + private erode( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number + ): Uint8Array { + const result = new Uint8Array(width * height); + const offset = Math.floor(kernelSize / 2); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + let minVal = 1; + + for (let ky = -offset; ky <= offset; ky++) { + for (let kx = -offset; kx <= offset; kx++) { + const ny = y + ky; + const nx = x + kx; + + if (nx >= 0 && nx < width && ny >= 0 && ny < height) { + minVal = Math.min(minVal, binary[ny * width + nx]); + } + } + } + + result[y * width + x] = minVal; + } + } + + return result; + } + + /** + * 查找边缘连通区域 + */ + private findEdgeRegionsList( + binary: Uint8Array, + width: number, + height: number + ): BoundingBox[] { + const visited = new Uint8Array(width * height); + const regions: BoundingBox[] = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + + if (visited[idx] === 0 && binary[idx] === 1) { + const region = this.floodFill(binary, visited, x, y, width, height); + + if (region.width >= 30 && region.height >= 30) { + regions.push(region); + } + } + } + } + + return regions; + } + + /** + * 泛洪填充 + */ + private floodFill( + binary: Uint8Array, + visited: Uint8Array, + startX: number, + startY: number, + width: number, + height: number + ): BoundingBox { + const stack: Array<[number, number]> = [[startX, startY]]; + let pixelCount = 0; + let minX = width; + let minY = height; + let maxX = 0; + let maxY = 0; + + while (stack.length > 0) { + const [x, y] = stack.pop()!; + + if (x < 0 || x >= width || y < 0 || y >= height) continue; + + const idx = y * width + x; + if (visited[idx] === 1) continue; + if (binary[idx] !== 1) continue; + + visited[idx] = 1; + pixelCount++; + + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + + stack.push([x + 1, y]); + stack.push([x - 1, y]); + stack.push([x, y + 1]); + stack.push([x, y - 1]); + } + + return { + x: minX, + y: minY, + width: maxX - minX + 1, + height: maxY - minY + 1, + score: pixelCount + }; + } + + /** + * 选择最佳候选区域 + */ + private selectBestRegions( + regions: BoundingBox[], + imageWidth: number, + imageHeight: number, + selectMultiple: boolean = false + ): BoundingBox[] { + if (regions.length === 0) return []; + + // 滑块特征(基于边缘): + // 1. 大小 70-110px (放宽范围) + // 2. 宽高比 0.7-1.4 (接近正方形) + // 3. 位置在图片的合理范围内 + // 4. 边缘密度适中(不会太sparse) + + const candidates = regions.filter(region => { + const aspectRatio = region.width / region.height; + const centerY = region.y + region.height / 2; + const edgeDensity = region.score / (region.width * region.height); + + return ( + region.width >= 70 && region.width <= 110 && + region.height >= 70 && region.height <= 110 && + aspectRatio >= 0.7 && aspectRatio <= 1.4 && + centerY > imageHeight * 0.15 && + centerY < imageHeight * 0.75 && + edgeDensity > 0.08 && edgeDensity < 0.45 // 边缘密度:不太稀疏,也不太密集 + ); + }); + + if (candidates.length === 0) return []; + + // 按质量排序 + candidates.sort((a, b) => { + const scoreA = Math.abs(a.width / a.height - 1) + Math.abs(a.width - 90) / 100; + const scoreB = Math.abs(b.width / b.height - 1) + Math.abs(b.width - 90) / 100; + return scoreA - scoreB; + }); + + // 返回不重叠的候选 + const selected: BoundingBox[] = []; + for (const candidate of candidates) { + const overlaps = selected.some(s => { + const iou = this.calculateIoU(s, candidate); + return iou > 0.3; + }); + + if (!overlaps) { + selected.push(candidate); + if (!selectMultiple && selected.length >= 1) break; + if (selectMultiple && selected.length >= 3) break; + } + } + + return selected; + } + + /** + * 计算IoU + */ + private calculateIoU(box1: BoundingBox, box2: BoundingBox): number { + const x1 = Math.max(box1.x, box2.x); + const y1 = Math.max(box1.y, box2.y); + const x2 = Math.min(box1.x + box1.width, box2.x + box2.width); + const y2 = Math.min(box1.y + box1.height, box2.y + box2.height); + + const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); + const box1Area = box1.width * box1.height; + const box2Area = box2.width * box2.height; + const unionArea = box1Area + box2Area - intersectionArea; + + return unionArea > 0 ? intersectionArea / unionArea : 0; + } + + /** + * 绘制检测框 + */ + private async drawBoxes( + imagePath: string, + boxes: BoundingBox[], + outputPath: string + ): Promise { + const image = sharp(imagePath); + const metadata = await image.metadata(); + const { width = 0, height = 0 } = metadata; + + const svgBoxes = boxes.map(box => + `` + ).join('\n'); + + const svg = ` + + ${svgBoxes} + + `; + + await image + .composite([{ input: Buffer.from(svg), top: 0, left: 0 }]) + .toFile(outputPath); + } +} diff --git a/src/extract-targets.ts b/src/extract-targets.ts new file mode 100644 index 0000000..132270e --- /dev/null +++ b/src/extract-targets.ts @@ -0,0 +1,179 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; + +interface BoundingBox { + x: number; + y: number; + width: number; + height: number; +} + +/** + * 从标注图像中提取所有红框(支持多个) + */ +async function extractAllRedBoxes(imagePath: string): Promise { + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + // 创建红色像素的二值图 + const redMap = new Uint8Array(width * height); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + + // 检测红色像素(高R值,低G和B值) + redMap[y * width + x] = (r > 200 && g < 100 && b < 100) ? 1 : 0; + } + } + + // 使用连通区域分析找到所有红框 + const visited = new Uint8Array(width * height); + const boxes: BoundingBox[] = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + + if (visited[idx] === 0 && redMap[idx] === 1) { + // 找到一个新的红色区域 + let minX = x, minY = y, maxX = x, maxY = y; + const stack: Array<[number, number]> = [[x, y]]; + + while (stack.length > 0) { + const [cx, cy] = stack.pop()!; + if (cx < 0 || cx >= width || cy < 0 || cy >= height) continue; + + const cidx = cy * width + cx; + if (visited[cidx] === 1 || redMap[cidx] !== 1) continue; + + visited[cidx] = 1; + minX = Math.min(minX, cx); + minY = Math.min(minY, cy); + maxX = Math.max(maxX, cx); + maxY = Math.max(maxY, cy); + + stack.push([cx + 1, cy], [cx - 1, cy], [cx, cy + 1], [cx, cy - 1]); + } + + const boxWidth = maxX - minX + 1; + const boxHeight = maxY - minY + 1; + + // 过滤掉太小的噪点(红框应该足够大) + if (boxWidth > 50 && boxHeight > 30) { + boxes.push({ + x: minX, + y: minY, + width: boxWidth, + height: boxHeight + }); + } + } + } + } + + return boxes; +} + +/** + * 在红框内查找实际的滑块缺口(小的正方形区域) + */ +function findSlidersInRedBox(redBox: BoundingBox): BoundingBox[] { + // 红框通常标注的是一个横向区域,里面包含1-2个滑块缺口 + // 滑块缺口特征:50-90像素的正方形 + + const sliders: BoundingBox[] = []; + const expectedSliderSize = 60; // 预期滑块大小 + + // 如果红框宽度远大于高度,说明是横向区域,可能包含多个滑块 + if (redBox.width > redBox.height * 2) { + // 估算可能有几个滑块 + const possibleCount = Math.round(redBox.width / expectedSliderSize); + + if (possibleCount >= 2) { + // 可能有2个滑块,在红框的左右两侧 + sliders.push({ + x: redBox.x, + y: redBox.y, + width: Math.min(90, redBox.height), + height: redBox.height + }); + + sliders.push({ + x: redBox.x + redBox.width - Math.min(90, redBox.height), + y: redBox.y, + width: Math.min(90, redBox.height), + height: redBox.height + }); + } else { + // 只有1个滑块,使用红框高度作为大小 + sliders.push({ + x: redBox.x, + y: redBox.y, + width: Math.min(90, redBox.height), + height: redBox.height + }); + } + } else { + // 红框本身就接近正方形,直接使用 + sliders.push(redBox); + } + + return sliders; +} + +async function main() { + const baseDir = path.join(__dirname, '..'); + const targetDir = path.join(baseDir, 'images', 'douban-target'); + + if (!fs.existsSync(targetDir)) { + console.error('目录不存在:', targetDir); + return; + } + + const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png')); + + console.log('=== 提取红框标注信息 ===\n'); + + const groundTruth: Record = {}; + + for (const file of files) { + const imagePath = path.join(targetDir, file); + const redBoxes = await extractAllRedBoxes(imagePath); + + console.log(`${file}:`); + console.log(` 找到 ${redBoxes.length} 个红框标注`); + + const allSliders: BoundingBox[] = []; + + redBoxes.forEach((box, idx) => { + console.log(` 红框${idx + 1}: [x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}]`); + + // 分析红框内的滑块 + const sliders = findSlidersInRedBox(box); + console.log(` -> 推测包含 ${sliders.length} 个滑块`); + + sliders.forEach((slider, sIdx) => { + console.log(` 滑块${sIdx + 1}: [x=${slider.x}, y=${slider.y}, w=${slider.width}, h=${slider.height}]`); + allSliders.push(slider); + }); + }); + + groundTruth[file] = allSliders; + console.log(''); + } + + // 保存标准答案到文件 + const outputPath = path.join(baseDir, 'ground-truth.json'); + fs.writeFileSync(outputPath, JSON.stringify(groundTruth, null, 2)); + console.log(`标准答案已保存到: ${outputPath}\n`); +} + +main().catch(console.error); diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..da32c4c --- /dev/null +++ b/src/index.ts @@ -0,0 +1,586 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; + +interface BoundingBox { + x: number; + y: number; + width: number; + height: number; + score: number; +} + +class SliderDetector { + /** + * 使用模板匹配来检测滑块位置 + */ + async detectSlider( + imagePath: string, + outputPath?: string + ): Promise { + try { + // 读取主图像 + const imageBuffer = await sharp(imagePath).ensureAlpha().raw().toBuffer({ resolveWithObject: true }); + const { data: imageData, info: imageInfo } = imageBuffer; + + // 使用边缘检测来找滑块 + const box = await this.findSliderByEdgeDetection(imagePath); + + // 如果需要输出结果 + if (outputPath && box) { + await this.drawBoundingBox(imagePath, box, outputPath, 'blue'); + } + + return box; + } catch (error) { + console.error(`Error detecting slider in ${imagePath}:`, error); + return null; + } + } + + /** + * 使用边缘检测和形状分析来找滑块 + */ + private async findSliderByEdgeDetection(imagePath: string): Promise { + // 读取原始图像数据 + const { data: rawData, info: rawInfo } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = rawInfo; + + // 方法1: 检测滑块缺口(暗色区域) + const gapBox = this.detectSliderGap(rawData, width, height, channels); + if (gapBox) { + return gapBox; + } + + // 方法2: 使用边缘检测 + const processed = await sharp(imagePath) + .greyscale() + .normalize() + .toBuffer(); + + // 应用边缘检测(使用锐化滤镜来增强边缘) + const edges = await sharp(processed) + .convolve({ + width: 3, + height: 3, + kernel: [-1, -1, -1, -1, 8, -1, -1, -1, -1] + }) + .toBuffer({ resolveWithObject: true }); + + const { data, info } = edges; + const edgeChannels = info.channels; + + // 二值化 + const threshold = 40; + const binary = new Uint8Array(data.length); + for (let i = 0; i < data.length; i += edgeChannels) { + const value = data[i] > threshold ? 255 : 0; + for (let j = 0; j < edgeChannels; j++) { + binary[i + j] = value; + } + } + + // 查找连通区域 + const regions = this.findConnectedRegions(binary, width, height, edgeChannels); + + // 过滤并找到最可能的滑块区域 + const sliderRegion = this.findSliderRegion(regions, width, height); + + return sliderRegion; + } + + /** + * 检测滑块缺口(豆瓣滑块通常在图片上有一个明显的缺口) + */ + private detectSliderGap( + data: Buffer, + width: number, + height: number, + channels: number + ): BoundingBox | null { + // 创建亮度图 + const brightness = new Float32Array(width * height); + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + brightness[y * width + x] = (r + g + b) / 3; + } + } + + // 计算垂直和水平投影来检测边界 + const verticalProj = new Float32Array(width); + const horizontalProj = new Float32Array(height); + + for (let x = 0; x < width; x++) { + let sum = 0; + for (let y = 0; y < height; y++) { + // 检测亮度变化(边缘) + if (y > 0) { + const diff = Math.abs(brightness[y * width + x] - brightness[(y - 1) * width + x]); + sum += diff; + } + } + verticalProj[x] = sum; + } + + for (let y = 0; y < height; y++) { + let sum = 0; + for (let x = 0; x < width; x++) { + if (x > 0) { + const diff = Math.abs(brightness[y * width + x] - brightness[y * width + (x - 1)]); + sum += diff; + } + } + horizontalProj[y] = sum; + } + + // 寻找投影中的峰值区域(滑块边界) + const sliderCandidates = this.findProjectionPeaks( + verticalProj, + horizontalProj, + width, + height + ); + + if (sliderCandidates.length > 0) { + return sliderCandidates[0]; + } + + return null; + } + + /** + * 从投影数据中找到峰值区域 + */ + private findProjectionPeaks( + verticalProj: Float32Array, + horizontalProj: Float32Array, + width: number, + height: number + ): BoundingBox[] { + const candidates: BoundingBox[] = []; + + // 计算阈值 + const vThreshold = this.calculateThreshold(verticalProj); + const hThreshold = this.calculateThreshold(horizontalProj); + + // 找垂直方向的峰值区域 + const vRegions: Array<[number, number]> = []; + let inRegion = false; + let start = 0; + + for (let x = 0; x < width; x++) { + if (verticalProj[x] > vThreshold && !inRegion) { + start = x; + inRegion = true; + } else if (verticalProj[x] <= vThreshold && inRegion) { + if (x - start >= 30 && x - start <= 100) { + vRegions.push([start, x]); + } + inRegion = false; + } + } + + // 找水平方向的峰值区域 + const hRegions: Array<[number, number]> = []; + inRegion = false; + start = 0; + + for (let y = 0; y < height; y++) { + if (horizontalProj[y] > hThreshold && !inRegion) { + start = y; + inRegion = true; + } else if (horizontalProj[y] <= hThreshold && inRegion) { + if (y - start >= 30 && y - start <= 100) { + hRegions.push([start, y]); + } + inRegion = false; + } + } + + // 组合垂直和水平区域形成候选框 + for (const [x1, x2] of vRegions) { + for (const [y1, y2] of hRegions) { + const w = x2 - x1; + const h = y2 - y1; + const aspectRatio = w / h; + + // 滑块通常是正方形或接近正方形 + if (aspectRatio >= 0.6 && aspectRatio <= 1.7) { + candidates.push({ + x: x1, + y: y1, + width: w, + height: h, + score: 1.0 + }); + } + } + } + + return candidates; + } + + /** + * 计算自适应阈值 + */ + private calculateThreshold(values: Float32Array): number { + let sum = 0; + for (let i = 0; i < values.length; i++) { + sum += values[i]; + } + const mean = sum / values.length; + + // 使用平均值的1.5倍作为阈值 + return mean * 1.5; + } + + /** + * 查找连通区域 + */ + private findConnectedRegions( + binary: Uint8Array, + width: number, + height: number, + channels: number + ): BoundingBox[] { + const visited = new Uint8Array(width * height); + const regions: BoundingBox[] = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + const pixelIdx = idx * channels; + + if (visited[idx] === 0 && binary[pixelIdx] === 255) { + const region = this.floodFill(binary, visited, x, y, width, height, channels); + if (region.width > 10 && region.height > 10) { // 过滤太小的区域 + regions.push(region); + } + } + } + } + + return regions; + } + + /** + * 洪水填充算法查找连通区域 + */ + private floodFill( + binary: Uint8Array, + visited: Uint8Array, + startX: number, + startY: number, + width: number, + height: number, + channels: number + ): BoundingBox { + let minX = startX; + let minY = startY; + let maxX = startX; + let maxY = startY; + + const stack: Array<[number, number]> = [[startX, startY]]; + + while (stack.length > 0) { + const [x, y] = stack.pop()!; + + if (x < 0 || x >= width || y < 0 || y >= height) continue; + + const idx = y * width + x; + if (visited[idx] === 1) continue; + + const pixelIdx = idx * channels; + if (binary[pixelIdx] !== 255) continue; + + visited[idx] = 1; + + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + + stack.push([x + 1, y]); + stack.push([x - 1, y]); + stack.push([x, y + 1]); + stack.push([x, y - 1]); + } + + return { + x: minX, + y: minY, + width: maxX - minX + 1, + height: maxY - minY + 1, + score: 1.0 + }; + } + + /** + * 从所有区域中找到最可能的滑块区域 + */ + private findSliderRegion( + regions: BoundingBox[], + imageWidth: number, + imageHeight: number + ): BoundingBox | null { + if (regions.length === 0) return null; + + // 滑块通常的特征: + // 1. 宽度在30-80像素之间 + // 2. 高度在30-80像素之间 + // 3. 宽高比接近1(正方形) + // 4. 位于图像上半部分 + + const candidates = regions.filter(region => { + const aspectRatio = region.width / region.height; + return ( + region.width >= 30 && region.width <= 100 && + region.height >= 30 && region.height <= 100 && + aspectRatio >= 0.5 && aspectRatio <= 2.0 && + region.y < imageHeight * 0.7 // 在图像上部70%的区域内 + ); + }); + + if (candidates.length === 0) return null; + + // 选择最方正的区域(宽高比最接近1) + candidates.sort((a, b) => { + const ratioA = Math.abs(a.width / a.height - 1); + const ratioB = Math.abs(b.width / b.height - 1); + return ratioA - ratioB; + }); + + const best = candidates[0]; + + // 扩展边界框以包含完整滑块(增加一些边距) + const padding = 5; + return { + x: Math.max(0, best.x - padding), + y: Math.max(0, best.y - padding), + width: Math.min(imageWidth - best.x + padding, best.width + padding * 2), + height: Math.min(imageHeight - best.y + padding, best.height + padding * 2), + score: best.score + }; + } + + /** + * 在图像上绘制边界框 + */ + private async drawBoundingBox( + imagePath: string, + box: BoundingBox, + outputPath: string, + color: 'red' | 'blue' | 'green' = 'blue' + ): Promise { + const colorMap = { + red: { r: 255, g: 0, b: 0 }, + blue: { r: 0, g: 0, b: 255 }, + green: { r: 0, g: 255, b: 0 } + }; + + const rgb = colorMap[color]; + const lineWidth = 2; + + // 读取原始图像 + const image = sharp(imagePath); + const metadata = await image.metadata(); + + if (!metadata.width || !metadata.height) { + throw new Error('Cannot get image dimensions'); + } + + // 创建SVG覆盖层绘制矩形框 + const svg = ` + + + + `; + + await image + .composite([{ + input: Buffer.from(svg), + top: 0, + left: 0 + }]) + .toFile(outputPath); + } + + /** + * 验证检测结果与人工标注的匹配度 + */ + async validateDetection( + imagePath: string, + targetImagePath: string + ): Promise<{ match: boolean; iou: number }> { + // 从人工标注图像中提取红色框的位置 + const targetBox = await this.extractRedBox(targetImagePath); + if (!targetBox) { + console.log(`No red box found in ${targetImagePath}`); + return { match: false, iou: 0 }; + } + + // 检测滑块位置 + const detectedBox = await this.detectSlider(imagePath); + if (!detectedBox) { + console.log(`No slider detected in ${imagePath}`); + return { match: false, iou: 0 }; + } + + // 计算IoU (Intersection over Union) + const iou = this.calculateIoU(detectedBox, targetBox); + const match = iou > 0.5; // IoU > 0.5 认为匹配成功 + + return { match, iou }; + } + + /** + * 从标注图像中提取红色框 + */ + private async extractRedBox(imagePath: string): Promise { + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + // 查找红色像素 + let minX = width; + let minY = height; + let maxX = 0; + let maxY = 0; + let foundRed = false; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + + // 检测红色像素 (高R值,低G和B值) + if (r > 200 && g < 100 && b < 100) { + foundRed = true; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + } + } + + if (!foundRed) return null; + + return { + x: minX, + y: minY, + width: maxX - minX + 1, + height: maxY - minY + 1, + score: 1.0 + }; + } + + /** + * 计算两个边界框的IoU + */ + private calculateIoU(box1: BoundingBox, box2: BoundingBox): number { + const x1 = Math.max(box1.x, box2.x); + const y1 = Math.max(box1.y, box2.y); + const x2 = Math.min(box1.x + box1.width, box2.x + box2.width); + const y2 = Math.min(box1.y + box1.height, box2.y + box2.height); + + const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); + const box1Area = box1.width * box1.height; + const box2Area = box2.width * box2.height; + const unionArea = box1Area + box2Area - intersectionArea; + + return intersectionArea / unionArea; + } +} + +async function main() { + const detector = new SliderDetector(); + const baseDir = path.join(__dirname, '..'); + + console.log('=== 开始滑块检测 ===\n'); + + // 1. 验证算法准确性(使用douban-target中的标注图片) + console.log('1. 验证算法准确性...'); + const doubanTargetDir = path.join(baseDir, 'images', 'douban-target'); + const doubanDir = path.join(baseDir, 'images', 'douban'); + + if (fs.existsSync(doubanTargetDir)) { + const targetFiles = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png')); + let successCount = 0; + let totalIoU = 0; + + for (const file of targetFiles) { + const targetPath = path.join(doubanTargetDir, file); + const imagePath = path.join(doubanDir, file); + + if (!fs.existsSync(imagePath)) { + console.log(` 跳过 ${file} (原图不存在)`); + continue; + } + + const result = await detector.validateDetection(imagePath, targetPath); + console.log(` ${file}: IoU = ${result.iou.toFixed(3)}, 匹配 = ${result.match ? '✓' : '✗'}`); + + if (result.match) successCount++; + totalIoU += result.iou; + } + + const accuracy = targetFiles.length > 0 ? (successCount / targetFiles.length * 100).toFixed(1) : 0; + const avgIoU = targetFiles.length > 0 ? (totalIoU / targetFiles.length).toFixed(3) : 0; + console.log(`\n 准确率: ${successCount}/${targetFiles.length} (${accuracy}%)`); + console.log(` 平均IoU: ${avgIoU}\n`); + } + + // 2. 处理所有豆瓣图片并输出结果 + console.log('2. 处理豆瓣滑块图片...'); + const outputDir = path.join(baseDir, 'images', 'output'); + + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); + } + + if (fs.existsSync(doubanDir)) { + const files = fs.readdirSync(doubanDir).filter(f => f.endsWith('.png')); + let processedCount = 0; + + for (const file of files) { + const inputPath = path.join(doubanDir, file); + const outputPath = path.join(outputDir, file); + + const box = await detector.detectSlider(inputPath, outputPath); + + if (box) { + console.log(` ✓ ${file}: 检测到滑块 [x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}]`); + processedCount++; + } else { + console.log(` ✗ ${file}: 未检测到滑块`); + } + } + + console.log(`\n 处理完成: ${processedCount}/${files.length} 张图片`); + console.log(` 输出目录: ${outputDir}\n`); + } + + console.log('=== 检测完成 ==='); +} + +main().catch(console.error); diff --git a/src/test-cv.ts b/src/test-cv.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/test-edge.ts b/src/test-edge.ts new file mode 100644 index 0000000..c80a386 --- /dev/null +++ b/src/test-edge.ts @@ -0,0 +1,78 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { EdgeSliderDetector } from './detector-edge'; +import { SliderValidator, BoundingBox as ValidatorBox } from './validator'; + +async function main() { + const detector = new EdgeSliderDetector(); + const validator = new SliderValidator(); + const baseDir = path.join(__dirname, '..'); + const doubanDir = path.join(baseDir, 'images', 'douban'); + const doubanTargetDir = path.join(baseDir, 'images', 'douban-target'); + const outputDir = path.join(baseDir, 'images', 'output-canny'); + + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); + } + + console.log('=== 测试Canny边缘检测方法 ===\n'); + + const files = fs.readdirSync(doubanTargetDir).filter(f => f.endsWith('.png')); + let totalMatched = 0; + let totalTargets = 0; + let totalDetected = 0; + + for (const file of files) { + const imagePath = path.join(doubanDir, file); + const targetPath = path.join(doubanTargetDir, file); + const outputPath = path.join(outputDir, file); + + if (!fs.existsSync(imagePath)) continue; + + // 获取标准答案 + const targetBoxes = await validator.extractRedBoxes(targetPath); + + // 检测滑块 + const detected = await detector.detectSlider(imagePath, outputPath, true); + const detectedBoxes = detected ? (Array.isArray(detected) ? detected : [detected]) : []; + + // 转换格式 + const detectedValidatorBoxes: ValidatorBox[] = detectedBoxes.map(b => ({ + x: b.x, + y: b.y, + width: b.width, + height: b.height + })); + + // 验证 + const result = await validator.validateDetection(detectedValidatorBoxes, targetBoxes, 10); + + totalMatched += result.matchedCount; + totalTargets += result.totalTargets; + totalDetected += result.detectedCount; + + console.log(`${file}:`); + console.log(` 目标: ${result.totalTargets}, 检测: ${result.detectedCount}, 匹配: ${result.matchedCount}`); + console.log(` 准确率: ${(result.precision * 100).toFixed(1)}%, 召回率: ${(result.recall * 100).toFixed(1)}%`); + + if (result.matchedCount < result.totalTargets) { + console.log(` ⚠️ 漏检: ${result.totalTargets - result.matchedCount}个`); + } + if (result.unmatched.length > 0) { + console.log(` ⚠️ 误检: ${result.unmatched.length}个`); + } + } + + const overallPrecision = totalDetected > 0 ? (totalMatched / totalDetected * 100).toFixed(1) : '0.0'; + const overallRecall = totalTargets > 0 ? (totalMatched / totalTargets * 100).toFixed(1) : '0.0'; + + console.log(`\n总体统计:`); + console.log(` 总目标数: ${totalTargets}个`); + console.log(` 总检测数: ${totalDetected}个`); + console.log(` 成功匹配: ${totalMatched}个`); + console.log(` 准确率(Precision): ${overallPrecision}%`); + console.log(` 召回率(Recall): ${overallRecall}%`); + console.log(`\n输出目录: ${outputDir}`); +} + +main().catch(console.error); diff --git a/src/validator.ts b/src/validator.ts new file mode 100644 index 0000000..44fc8de --- /dev/null +++ b/src/validator.ts @@ -0,0 +1,213 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; + +interface BoundingBox { + x: number; + y: number; + width: number; + height: number; +} + +class SliderValidator { + /** + * 从标注图像中提取所有红框 + */ + async extractRedBoxes(imagePath: string): Promise { + const { data, info } = await sharp(imagePath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + // 创建红色像素地图 + const redMap = new Uint8Array(width * height); + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + + // 检测红色像素 + redMap[y * width + x] = (r > 200 && g < 100 && b < 100) ? 1 : 0; + } + } + + // 使用连通区域分析找到所有红框 + const visited = new Uint8Array(width * height); + const boxes: BoundingBox[] = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + if (visited[idx] === 0 && redMap[idx] === 1) { + const box = this.floodFill(redMap, visited, x, y, width, height); + if (box.width > 10 && box.height > 10) { + boxes.push(box); + } + } + } + } + + return boxes; + } + + /** + * 洪水填充算法 + */ + private floodFill( + binary: Uint8Array, + visited: Uint8Array, + startX: number, + startY: number, + width: number, + height: number + ): BoundingBox { + let minX = startX; + let minY = startY; + let maxX = startX; + let maxY = startY; + + const stack: Array<[number, number]> = [[startX, startY]]; + + while (stack.length > 0) { + const [x, y] = stack.pop()!; + + if (x < 0 || x >= width || y < 0 || y >= height) continue; + + const idx = y * width + x; + if (visited[idx] === 1) continue; + if (binary[idx] !== 1) continue; + + visited[idx] = 1; + + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + + stack.push([x + 1, y]); + stack.push([x - 1, y]); + stack.push([x, y + 1]); + stack.push([x, y - 1]); + } + + return { + x: minX, + y: minY, + width: maxX - minX + 1, + height: maxY - minY + 1 + }; + } + + /** + * 检查两个框是否匹配(允许一定偏差) + */ + isBoxMatching(detected: BoundingBox, target: BoundingBox, tolerance: number = 10): boolean { + // 计算中心点 + const detectedCenterX = detected.x + detected.width / 2; + const detectedCenterY = detected.y + detected.height / 2; + const targetCenterX = target.x + target.width / 2; + const targetCenterY = target.y + target.height / 2; + + // 中心点距离 + const centerDistance = Math.sqrt( + Math.pow(detectedCenterX - targetCenterX, 2) + + Math.pow(detectedCenterY - targetCenterY, 2) + ); + + // 尺寸差异 - 允许更大的容差,因为形态学操作可能改变大小 + const widthDiff = Math.abs(detected.width - target.width); + const heightDiff = Math.abs(detected.height - target.height); + + // 如果中心点距离小于容差,且尺寸差异不太大,认为匹配 + // 放宽尺寸容差到30px(考虑到形态学操作的影响) + return centerDistance <= tolerance && widthDiff <= 30 && heightDiff <= 30; + } + + /** + * 计算IoU(交并比) + */ + calculateIoU(box1: BoundingBox, box2: BoundingBox): number { + const x1 = Math.max(box1.x, box2.x); + const y1 = Math.max(box1.y, box2.y); + const x2 = Math.min(box1.x + box1.width, box2.x + box2.width); + const y2 = Math.min(box1.y + box1.height, box2.y + box2.height); + + const intersectionArea = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); + const box1Area = box1.width * box1.height; + const box2Area = box2.width * box2.height; + const unionArea = box1Area + box2Area - intersectionArea; + + return intersectionArea / unionArea; + } + + /** + * 验证检测结果 + */ + async validateDetection( + detectedBoxes: BoundingBox[], + targetBoxes: BoundingBox[], + tolerance: number = 10 + ): Promise<{ + totalTargets: number; + detectedCount: number; + matchedCount: number; + precision: number; + recall: number; + matches: Array<{ detected: BoundingBox; target: BoundingBox; iou: number }>; + unmatched: BoundingBox[]; + }> { + const matches: Array<{ detected: BoundingBox; target: BoundingBox; iou: number }> = []; + const matchedTargets = new Set(); + const matchedDetected = new Set(); + + // 为每个检测框找到最佳匹配的目标框 + for (let i = 0; i < detectedBoxes.length; i++) { + const detected = detectedBoxes[i]; + let bestMatch = -1; + let bestIoU = 0; + + for (let j = 0; j < targetBoxes.length; j++) { + if (matchedTargets.has(j)) continue; + + if (this.isBoxMatching(detected, targetBoxes[j], tolerance)) { + const iou = this.calculateIoU(detected, targetBoxes[j]); + if (iou > bestIoU) { + bestIoU = iou; + bestMatch = j; + } + } + } + + if (bestMatch >= 0) { + matches.push({ + detected, + target: targetBoxes[bestMatch], + iou: bestIoU + }); + matchedTargets.add(bestMatch); + matchedDetected.add(i); + } + } + + // 未匹配的检测框 + const unmatched = detectedBoxes.filter((_, i) => !matchedDetected.has(i)); + + const precision = detectedBoxes.length > 0 ? matches.length / detectedBoxes.length : 0; + const recall = targetBoxes.length > 0 ? matches.length / targetBoxes.length : 0; + + return { + totalTargets: targetBoxes.length, + detectedCount: detectedBoxes.length, + matchedCount: matches.length, + precision, + recall, + matches, + unmatched + }; + } +} + +export { SliderValidator, BoundingBox }; diff --git a/src/visualize.ts b/src/visualize.ts new file mode 100644 index 0000000..d6e121e --- /dev/null +++ b/src/visualize.ts @@ -0,0 +1,98 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; + +async function visualizeRedBox(imagePath: string, targetPath: string, outputPath: string) { + // 从target图像提取红框 + const { data, info } = await sharp(targetPath) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height, channels } = info; + + let minX = width, minY = height, maxX = 0, maxY = 0; + let foundRed = false; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = (y * width + x) * channels; + const r = data[idx]; + const g = data[idx + 1]; + const b = data[idx + 2]; + + if (r > 200 && g < 100 && b < 100) { + foundRed = true; + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + } + } + } + + if (!foundRed) { + console.log('No red box found'); + return; + } + + // 在原图上绘制红框 + const image = sharp(imagePath); + const metadata = await image.metadata(); + + const boxWidth = maxX - minX + 1; + const boxHeight = maxY - minY + 1; + + const svg = ` + + + + ${boxWidth}x${boxHeight} + + + `; + + await image + .composite([{ + input: Buffer.from(svg), + top: 0, + left: 0 + }]) + .toFile(outputPath); + + console.log(`Saved: ${path.basename(outputPath)}`); +} + +async function main() { + const baseDir = path.join(__dirname, '..'); + const doubanDir = path.join(baseDir, 'images', 'douban'); + const targetDir = path.join(baseDir, 'images', 'douban-target'); + const visualDir = path.join(baseDir, 'images', 'visual'); + + if (!fs.existsSync(visualDir)) { + fs.mkdirSync(visualDir, { recursive: true }); + } + + const files = fs.readdirSync(targetDir).filter(f => f.endsWith('.png')).slice(0, 9); + + for (const file of files) { + const imagePath = path.join(doubanDir, file); + const targetPath = path.join(targetDir, file); + const outputPath = path.join(visualDir, file); + + if (fs.existsSync(imagePath)) { + await visualizeRedBox(imagePath, targetPath, outputPath); + } + } + + console.log(`\nVisualization saved to: ${visualDir}`); +} + +main().catch(console.error); diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..f9e7017 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "moduleResolution": "node", + "outDir": "dist", + "rootDir": "src", + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "strict": true, + "skipLibCheck": true + }, + "include": ["src"] +}