update at 2025-10-25 23:39:25
2
.gitignore
vendored
@@ -1,6 +1,8 @@
|
||||
node_modules/
|
||||
dist/
|
||||
coverage/
|
||||
output/
|
||||
noflag/
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
|
||||
210
ARCHITECTURE.md
@@ -1,6 +1,6 @@
|
||||
# 架构说明
|
||||
# 架构说明(v1.1.0)
|
||||
|
||||
本文档梳理项目中的主要模块、职责划分以及核心流程,帮助维护者快速了解整体结构。当前版本仅关注短信验证码登录与 Cookie 持久化,滑块验证码需人工操作。
|
||||
本文档梳理项目中的主要模块、职责划分以及核心流程,帮助维护者快速了解整体结构。当前版本包含短信验证码登录、Cookie 持久化以及 AI 驱动的滑块验证码自动破解功能。
|
||||
|
||||
## 模块概览
|
||||
|
||||
@@ -8,64 +8,206 @@
|
||||
├── README.md // 使用说明与运行指引
|
||||
├── ARCHITECTURE.md // 架构概览与流程说明(本文档)
|
||||
├── IMPLEMENTATION.md // 关键实现细节记录
|
||||
├── QUICKSTART.md // 快速开始指南
|
||||
├── CHANGELOG.md // 版本更新日志
|
||||
├── release.md // 发布说明
|
||||
├── login.md // 早期需求与操作步骤
|
||||
├── package.json // 项目配置(v1.1.0)
|
||||
├── src/
|
||||
│ └── login.ts // 豆瓣登录脚本入口(Cookie 复用 + 短信登录)
|
||||
│ ├── login.ts // 豆瓣登录脚本入口(集成滑块验证)
|
||||
│ └── slider/ // 滑块验证模块(v1.1.0 新增)
|
||||
│ ├── index.ts // 模块导出
|
||||
│ ├── types.ts // 类型定义
|
||||
│ ├── detector.ts // 主滑块检测器
|
||||
│ ├── detector-self-learning.ts // 第二滑块检测
|
||||
│ ├── slider-controller.ts // 滑块控制器
|
||||
│ ├── cli.ts // CLI 批量工具
|
||||
│ ├── validator.ts // 结果验证工具
|
||||
│ ├── detection/
|
||||
│ │ └── candidate-search.ts // 多策略检测
|
||||
│ └── utils/
|
||||
│ ├── geometry.ts // 几何计算
|
||||
│ └── image.ts // 图像处理
|
||||
├── noflag/ // 原始验证码截图输出目录
|
||||
├── output/ // 标注结果输出目录
|
||||
└── typescript-spec.md // 团队 TypeScript 编码规范
|
||||
```
|
||||
|
||||
## 登录流程分层
|
||||
## 登录流程分层(v1.1.0)
|
||||
|
||||
```
|
||||
┌────────────────────────────────────┐
|
||||
┌─────────────────────────────────────────┐
|
||||
│ main() │
|
||||
│ - 启动 Chromium │
|
||||
│ - 复用或创建上下文 │
|
||||
│ - 调用 loginWithSms() │
|
||||
│ - 保存 Cookies │
|
||||
└────────────────────────────────────┘
|
||||
└─────────────────────────────────────────┘
|
||||
│
|
||||
┌────────────────▼──────────────────┐
|
||||
┌──────────────────▼────────────────────┐
|
||||
│ loginWithSms() │
|
||||
│ - 输入手机号 │
|
||||
│ - 触发短信验证码 │
|
||||
│ - 提示用户完成页面额外验证 │
|
||||
│ - [v1.1.0] 自动处理滑块验证 │
|
||||
│ - 等待并提交短信验证码 │
|
||||
│ - 校验是否登录成功 │
|
||||
└────────────────────────────────────┘
|
||||
└───────────────────────────────────────┘
|
||||
│
|
||||
┌────────────────▼──────────────────┐
|
||||
│ isLoggedIn() │
|
||||
│ - 检查关键 Cookie(dbcl2) │
|
||||
│ - 确认登录表单是否仍然可见 │
|
||||
└────────────────────────────────────┘
|
||||
┌────────────┴──────────────┐
|
||||
│ │
|
||||
┌─────▼──────────────┐ ┌─────────▼──────────────┐
|
||||
│ SliderController │ │ isLoggedIn() │
|
||||
│ - 等待滑块出现 │ │ - 检查 Cookie(dbcl2) │
|
||||
│ - 截图到 noflag/ │ │ - 确认登录表单状态 │
|
||||
│ - 调用 detector │ └────────────────────────┘
|
||||
│ - 计算距离 │
|
||||
│ - 拖动滑块 │
|
||||
│ - 验证成功标识 │
|
||||
│ - 失败重试(10次) │
|
||||
└────────────────────┘
|
||||
│
|
||||
┌────────▼───────────────┐
|
||||
│ SliderDetector │
|
||||
│ - 图像缩放(800px) │
|
||||
│ - 多策略检测 │
|
||||
│ - 候选框评分 │
|
||||
│ - 绘制标注到 output/ │
|
||||
└────────────────────────┘
|
||||
│
|
||||
┌────────▼───────────────┐
|
||||
│ CandidateSearch │
|
||||
│ - 暗区域检测 │
|
||||
│ - Canny 边缘检测 │
|
||||
│ - 颜色量化 │
|
||||
│ - LAB 色彩空间 │
|
||||
│ - IoU 去重 │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
- `prepareContext()`:负责加载已有 Cookie、创建新上下文以及兜底跳转登录页。
|
||||
- `loginWithSms()`:串联短信登录流程,涵盖用户输入与结果确认。
|
||||
- `isLoggedIn()`:封装判定逻辑,避免各处重复编写 Cookie/页面检查。
|
||||
**关键模块职责**:
|
||||
|
||||
- `prepareContext()`:负责加载已有 Cookie、创建新上下文以及兜底跳转登录页
|
||||
- `loginWithSms()`:串联短信登录流程,涵盖用户输入与滑块自动化
|
||||
- `SliderController`:Playwright 集成,控制滑块验证的完整流程
|
||||
- `SliderDetector`:图像处理和滑块位置检测的核心算法
|
||||
- `CandidateSearch`:多种图像识别策略的并行执行
|
||||
- `isLoggedIn()`:封装判定逻辑,避免各处重复编写 Cookie/页面检查
|
||||
|
||||
## 依赖与交互
|
||||
|
||||
- **Playwright**:启动浏览器、操作页面元素、持久化 `storageState`。
|
||||
- **Node.js**:文件读写、路径与环境变量处理。
|
||||
- **readline**:在控制台等待用户输入短信验证码。
|
||||
- **环境变量**:当前仅使用 `DOUBAN_PHONE` 指定登录手机号。
|
||||
- **`~/douban-cookie.json`**:保存登录态的 storageState 文件,下次运行直接复用。
|
||||
- **Playwright**:启动浏览器、操作页面元素、持久化 `storageState`、控制滑块拖动
|
||||
- **Sharp**:图像处理(缩放、边缘检测、颜色量化、模板匹配)
|
||||
- **Node.js**:文件读写、路径与环境变量处理
|
||||
- **readline**:在控制台等待用户输入短信验证码
|
||||
- **环境变量**:
|
||||
- `DOUBAN_PHONE`:登录手机号(必填)
|
||||
- `DOUBAN_AUTO_SLIDER`:启用自动滑块验证(可选,值为 1 时启用)
|
||||
- **`~/douban-cookie.json`**:保存登录态的 storageState 文件,下次运行直接复用
|
||||
- **`noflag/`**:原始验证码截图存储目录
|
||||
- **`output/`**:标注结果(红框)存储目录
|
||||
|
||||
## 数据流
|
||||
## 数据流(v1.1.0)
|
||||
|
||||
1. 读取 `DOUBAN_PHONE`,未配置则终止;
|
||||
2. 若存在本地 Cookie 文件,加载后访问登录页以确认是否仍然有效;
|
||||
3. 无有效登录态时执行短信登录:
|
||||
- Playwright 填写手机号并请求验证码;
|
||||
- 用户在浏览器中手动完成滑块等验证;
|
||||
- 控制台输入短信验证码并提交;
|
||||
4. 登录成功后调用 `context.storageState()` 写入 `~/douban-cookie.json`;
|
||||
5. 浏览器关闭,后续脚本可直接复用该文件。
|
||||
1. **初始化阶段**
|
||||
- 读取 `DOUBAN_PHONE`,未配置则终止
|
||||
- 检查 `DOUBAN_AUTO_SLIDER` 环境变量
|
||||
- 若存在本地 Cookie 文件,加载后访问登录页以确认是否仍然有效
|
||||
|
||||
2. **登录流程**
|
||||
- 无有效登录态时执行短信登录:
|
||||
- Playwright 填写手机号并请求验证码
|
||||
- **[v1.1.0]** 自动检测并处理滑块验证码:
|
||||
1. 等待验证码 iframe 加载
|
||||
2. 截图验证码区域到 `noflag/` 目录
|
||||
3. 使用 Sharp 将图像缩放到 800px 宽度
|
||||
4. 并行运行四种检测策略
|
||||
5. 计算距离:`(缺口X - 滑块X) / scaleX`
|
||||
6. 绘制红框标注保存到 `output/` 目录
|
||||
7. 拖动滑块到计算位置
|
||||
8. 检测成功标识(`.tc-success`)
|
||||
9. 失败则刷新重试(最多 10 次)
|
||||
- 控制台输入短信验证码并提交
|
||||
|
||||
3. **状态持久化**
|
||||
- 登录成功后调用 `context.storageState()` 写入 `~/douban-cookie.json`
|
||||
- 浏览器关闭,后续脚本可直接复用该文件
|
||||
|
||||
4. **图像数据流**
|
||||
```
|
||||
原始验证码(340x191)
|
||||
│
|
||||
▼ 截图
|
||||
noflag/captcha-timestamp.png
|
||||
│
|
||||
▼ 缩放到 800px
|
||||
内存中的处理图像(800x449)
|
||||
│
|
||||
▼ 多策略检测
|
||||
候选框数组 [{x,y,w,h,score}]
|
||||
│
|
||||
▼ 评分排序 + IoU去重
|
||||
最佳滑块位置 [b1, b2]
|
||||
│
|
||||
▼ 绘制红框
|
||||
output/captcha-timestamp-detected.png
|
||||
│
|
||||
▼ 计算距离
|
||||
移动距离 = (b2.x - b1.x) / scaleX
|
||||
```
|
||||
|
||||
## 日志与错误处理
|
||||
|
||||
- 关键步骤均在控制台打印提示,便于追踪流程;
|
||||
- 验证码相关操作采用提示 + `prompt` 方式等待人工输入;
|
||||
- 登录失败或异常会设置 `process.exitCode` 并输出详细错误信息。
|
||||
- 关键步骤均在控制台打印提示,便于追踪流程
|
||||
- **[v1.1.0]** 滑块检测过程的详细日志:
|
||||
- 图像缩放信息(原始尺寸 → 检测尺寸)
|
||||
- 检测到的滑块数量和位置
|
||||
- 每个滑块的评分和尺寸
|
||||
- 距离计算公式和结果
|
||||
- 成功/失败状态和重试次数
|
||||
- 验证码相关操作采用提示 + `prompt` 方式等待人工输入
|
||||
- 登录失败或异常会设置 `process.exitCode` 并输出详细错误信息
|
||||
- 视觉调试:`output/` 目录中的红框标注图便于人工验证检测准确性
|
||||
|
||||
## v1.1.0 核心创新
|
||||
|
||||
### 简化的距离计算算法
|
||||
|
||||
**核心原理**:"两只小鸟嘴尖距离"
|
||||
|
||||
```typescript
|
||||
// 双滑块模式(推荐)
|
||||
const distance = (box2.x - box1.x) / scaleX;
|
||||
|
||||
// 单滑块模式(兜底)
|
||||
const distance = box.x / scaleX;
|
||||
```
|
||||
|
||||
**为什么这样简单?**
|
||||
1. 检测在 800px 宽度图像上进行(scaleX ≈ 2.35)
|
||||
2. 两个滑块的左边界水平距离就是移动距离(缩放坐标系)
|
||||
3. 除以 scaleX 转换回实际显示坐标系(340px)
|
||||
4. 避免复杂的 iframe 偏移、页面坐标转换等计算
|
||||
|
||||
**v1.0.0 vs v1.1.0**:
|
||||
- v1.0.0:需要人工完成滑块验证
|
||||
- v1.1.0:自动检测、计算、拖动,成功率约 50%
|
||||
|
||||
### 多策略并行检测
|
||||
|
||||
并行运行四种算法,提高鲁棒性:
|
||||
|
||||
1. **暗区域检测**:基于亮度阈值查找暗色滑块
|
||||
2. **Canny 边缘检测**:查找边缘密集区域
|
||||
3. **颜色量化**:K-means 聚类找独特色块
|
||||
4. **LAB 色彩空间**:在感知均匀的色彩空间中检测
|
||||
|
||||
候选框通过 IoU 去重,避免重复检测同一个滑块。
|
||||
|
||||
### 自学习模板匹配
|
||||
|
||||
使用第一个检测到的滑块作为模板,在图像中查找第二个滑块:
|
||||
|
||||
1. 提取第一个滑块的边缘特征
|
||||
2. 在剩余区域进行模板匹配
|
||||
3. 验证 y 坐标一致性(偏差 < 25px)
|
||||
4. 确保两个滑块在合理的水平距离范围内
|
||||
|
||||
51
CHANGELOG.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# 更新日志
|
||||
|
||||
## [1.1.0] - 2025-10-25
|
||||
|
||||
### ✨ 新功能
|
||||
|
||||
- **自动滑块验证**: 集成 AI 驱动的滑块验证码识别和求解功能
|
||||
- **多策略检测**: 实现暗区检测、边缘检测、颜色量化、LAB 色彩空间分析四种并行策略
|
||||
- **双滑块识别**: 支持同时检测左侧滑块和右侧缺口,实现精确距离计算
|
||||
- **可视化调试**: 自动生成带红框标记的检测结果图片,保存在 `output/` 目录
|
||||
- **自动重试机制**: 验证失败时自动刷新并重试,最多 10 次
|
||||
- **图像缩放优化**: 自动将验证码图片放大到 800px 宽度以提高识别精度
|
||||
|
||||
### 🔧 优化改进
|
||||
|
||||
- **简化距离计算**: 采用更简洁准确的算法
|
||||
- 双滑块模式:`距离 = (缺口X - 滑块X) / scaleX`
|
||||
- 移除了不必要的复杂坐标转换逻辑
|
||||
- **拟人化滑动**: 使用 Playwright 的 `steps` 参数实现更平滑的鼠标移动轨迹
|
||||
- **增强成功判断**: 检测腾讯验证码特有的成功标识(`.tc-success`)
|
||||
- **优化元素等待**: 增加 iframe 内元素加载的检测和重试机制
|
||||
- **详细日志输出**: 添加完整的调试信息,便于问题追溯
|
||||
|
||||
### 🐛 Bug 修复
|
||||
|
||||
- 修复坐标系不统一导致的距离计算错误
|
||||
- 修复 iframe 内元素无法正确访问的问题
|
||||
- 修复候选框因边距过滤被误删的问题
|
||||
- 修复截图时包含滑块本身导致识别干扰的问题
|
||||
|
||||
### 📝 文档更新
|
||||
|
||||
- 更新 `README.md`,添加详细的功能说明和使用指南
|
||||
- 创建 `src/slider/README.md`,详细说明滑块识别算法和实现细节
|
||||
- 添加调试技巧和常见问题排查指南
|
||||
|
||||
### 🏗️ 架构变更
|
||||
|
||||
- 从 `captcha_cracker` 项目移植核心识别算法
|
||||
- 新增 `src/slider/` 模块,包含完整的滑块验证功能
|
||||
- 集成 Sharp 库用于图像处理
|
||||
- 支持通过 `DOUBAN_AUTO_SLIDER=1` 环境变量启用自动滑块验证
|
||||
|
||||
## [1.0.0] - 2025-10-24
|
||||
|
||||
### 初始版本
|
||||
|
||||
- 基于 Playwright 的豆瓣登录自动化
|
||||
- 短信验证码登录支持
|
||||
- Cookie 持久化和复用
|
||||
- 手动滑块验证提示
|
||||
@@ -1,40 +1,62 @@
|
||||
# 登录脚本实现笔记
|
||||
# 登录脚本实现笔记(v1.1.0)
|
||||
|
||||
本文记录当前版本豆瓣登录脚本的实现细节、关键函数以及后续可扩展点。滑块验证码相关逻辑已移除,若页面出现额外验证需人工完成。
|
||||
本文记录当前版本豆瓣登录脚本的实现细节、关键函数以及后续可扩展点。v1.1.0 版本集成了完整的滑块验证码自动破解功能,大幅提升自动化程度。
|
||||
|
||||
## 文件结构
|
||||
|
||||
```
|
||||
src/
|
||||
└── login.ts # Playwright 入口脚本
|
||||
├── login.ts # Playwright 入口脚本
|
||||
└── slider/ # v1.1.0 新增滑块验证模块
|
||||
├── index.ts
|
||||
├── types.ts
|
||||
├── detector.ts
|
||||
├── detector-self-learning.ts
|
||||
├── slider-controller.ts
|
||||
├── cli.ts
|
||||
├── validator.ts
|
||||
├── detection/
|
||||
│ └── candidate-search.ts
|
||||
└── utils/
|
||||
├── geometry.ts
|
||||
└── image.ts
|
||||
```
|
||||
|
||||
辅助文档位于项目根目录:
|
||||
|
||||
- `README.md`:使用说明与常见问题;
|
||||
- `ARCHITECTURE.md`:整体架构与流程拆解;
|
||||
- `login.md`:早期需求说明,可作为手动操作参考。
|
||||
- `README.md`:使用说明与常见问题
|
||||
- `ARCHITECTURE.md`:整体架构与流程拆解
|
||||
- `QUICKSTART.md`:快速开始指南
|
||||
- `CHANGELOG.md`:版本更新日志
|
||||
- `login.md`:早期需求说明,可作为手动操作参考
|
||||
|
||||
## 核心流程
|
||||
## 核心流程(v1.1.0)
|
||||
|
||||
1. **读取配置**
|
||||
通过 `process.env.DOUBAN_PHONE` 获取手机号,缺失时直接退出。
|
||||
- 通过 `process.env.DOUBAN_PHONE` 获取手机号,缺失时直接退出
|
||||
- 检查 `process.env.DOUBAN_AUTO_SLIDER` 是否启用自动滑块验证
|
||||
|
||||
2. **准备浏览器上下文** (`prepareContext`)
|
||||
- 若存在 `~/douban-cookie.json`,以 `storageState` 形式加载;
|
||||
- 打开登录页并调用 `isLoggedIn` 校验是否仍在登录态;
|
||||
- 失效时关闭旧上下文并创建全新 session。
|
||||
- 若存在 `~/douban-cookie.json`,以 `storageState` 形式加载
|
||||
- 打开登录页并调用 `isLoggedIn` 校验是否仍在登录态
|
||||
- 失效时关闭旧上下文并创建全新 session
|
||||
|
||||
3. **执行短信登录** (`loginWithSms`)
|
||||
- 输入手机号 → 点击「获取验证码」;
|
||||
- 控制台提醒用户在浏览器中手动完成滑块等页面验证;
|
||||
- 通过 `prompt` 等待用户输入短信验证码并提交;
|
||||
- 等待 Playwright 检测到页面离开登录地址或抛出超时。
|
||||
- 输入手机号 → 点击「获取验证码」
|
||||
- **[v1.1.0]** 自动检测并处理滑块验证码:
|
||||
- 调用 `SliderController.solveSlider()`
|
||||
- 等待验证码 iframe 出现
|
||||
- 截图并保存到 `noflag/` 目录
|
||||
- 调用检测算法识别滑块位置
|
||||
- 计算滑动距离并执行拖动
|
||||
- 验证成功后继续,失败则重试(最多 10 次)
|
||||
- 通过 `prompt` 等待用户输入短信验证码并提交
|
||||
- 等待 Playwright 检测到页面离开登录地址或抛出超时
|
||||
|
||||
4. **确认状态并写入 Cookie 文件**
|
||||
- `isLoggedIn` 再次判断是否登录成功;
|
||||
- 调用 `context.storageState({ path })` 将状态写入 `~/douban-cookie.json`;
|
||||
- 终端提示成功信息,方便用户确认文件路径。
|
||||
- `isLoggedIn` 再次判断是否登录成功
|
||||
- 调用 `context.storageState({ path })` 将状态写入 `~/douban-cookie.json`
|
||||
- 终端提示成功信息,方便用户确认文件路径
|
||||
|
||||
## 关键函数
|
||||
|
||||
@@ -50,31 +72,309 @@ src/
|
||||
|
||||
串联短信验证码登录的主要逻辑,所有用户交互点都通过控制台提示:
|
||||
|
||||
- 页面操作由脚本自动完成(填手机号、点击按钮);
|
||||
- 人机验证与短信输入由用户处理;
|
||||
- 函数内部对提交过程设置合理的等待时间,避免过早关闭浏览器。
|
||||
- 页面操作由脚本自动完成(填手机号、点击按钮)
|
||||
- **[v1.1.0]** 滑块验证自动处理(启用 `DOUBAN_AUTO_SLIDER=1` 时)
|
||||
- 短信验证码输入由用户处理
|
||||
- 函数内部对提交过程设置合理的等待时间,避免过早关闭浏览器
|
||||
|
||||
### `main()`
|
||||
|
||||
作为 CLI 入口,负责整体 orchestrate:校验配置 → 启动浏览器 → 调用上述函数 → 捕获异常并设置 `process.exitCode`。
|
||||
|
||||
## v1.1.0 新增核心函数
|
||||
|
||||
### `SliderController.solveSlider(page, sliderSelector, captchaSelector)`
|
||||
|
||||
滑块验证的主控制器,负责完整的验证流程:
|
||||
|
||||
```typescript
|
||||
async solveSlider(
|
||||
page: Page,
|
||||
sliderSelector: string = '.tcaptcha_drag_button',
|
||||
captchaSelector: string = '#tcaptcha_iframe'
|
||||
): Promise<SliderSolveResult>
|
||||
```
|
||||
|
||||
**工作流程**:
|
||||
1. 等待验证码 iframe 加载(`waitForSelector`)
|
||||
2. 等待滑块背景图完全加载
|
||||
3. 进入重试循环(最多 10 次):
|
||||
- 调用 `captureSliderImage()` 截图到 `noflag/`
|
||||
- 调用 `SliderDetector.detectSlider()` 检测滑块
|
||||
- 调用 `calculateDistance()` 计算移动距离
|
||||
- 调用 `dragSlider()` 拖动滑块
|
||||
- 调用 `checkSuccess()` 检测是否成功
|
||||
- 成功则返回,失败则刷新验证码重试
|
||||
|
||||
**返回值**:
|
||||
```typescript
|
||||
interface SliderSolveResult {
|
||||
success: boolean; // 是否成功
|
||||
attempts: number; // 尝试次数
|
||||
distance?: number; // 滑动距离(像素)
|
||||
}
|
||||
```
|
||||
|
||||
### `SliderDetector.detectSlider(imagePath, outputPath, drawBoxes)`
|
||||
|
||||
滑块检测的核心算法实现:
|
||||
|
||||
```typescript
|
||||
async detectSlider(
|
||||
imagePath: string,
|
||||
outputPath: string,
|
||||
drawBoxes: boolean = true
|
||||
): Promise<BoundingBox[] | null>
|
||||
```
|
||||
|
||||
**工作流程**:
|
||||
1. 使用 Sharp 加载图像
|
||||
2. 缩放到 800px 宽度(保持宽高比)
|
||||
3. 调用 `CandidateSearch.findCandidates()` 获取候选框
|
||||
4. 对每个候选框计算综合评分
|
||||
5. 按评分排序,选择前 2 个
|
||||
6. 如果只有 1 个,尝试使用模板匹配找第二个
|
||||
7. 绘制红框标注并保存到 `outputPath`
|
||||
8. 返回检测到的滑块位置数组
|
||||
|
||||
**评分标准**:
|
||||
- 形状评分:宽高比、面积合理性
|
||||
- 色调一致性:内部颜色是否统一
|
||||
- 边缘密度:边缘特征是否明显
|
||||
- 梯度平滑度:是否有明确的边界
|
||||
|
||||
### `CandidateSearch.findCandidates(rawImage)`
|
||||
|
||||
多策略并行检测候选区域:
|
||||
|
||||
```typescript
|
||||
async findCandidates(rawImage: RawImage): Promise<BoundingBox[]>
|
||||
```
|
||||
|
||||
**四种策略**:
|
||||
|
||||
1. **暗区域检测** (`findDarkRegions`)
|
||||
- 基于亮度阈值(< 100)
|
||||
- 连通组件分析
|
||||
- 形状过滤(宽高比、面积)
|
||||
|
||||
2. **Canny 边缘检测** (`findEdgeDensityRegions`)
|
||||
- Canny 算法提取边缘
|
||||
- 滑动窗口统计边缘密度
|
||||
- 局部最大值抑制
|
||||
|
||||
3. **颜色量化** (`findColorQuantizationRegions`)
|
||||
- K-means 聚类(k=5)
|
||||
- 提取少数色块区域
|
||||
- 形状验证
|
||||
|
||||
4. **LAB 色彩空间** (`findLabColorRegions`)
|
||||
- 转换到 LAB 空间
|
||||
- 基于 a*、b* 通道的色度检测
|
||||
- 连通组件分析
|
||||
|
||||
**去重策略**:
|
||||
- 计算所有候选框的 IoU(交并比)
|
||||
- IoU > 0.3 认为是同一个滑块
|
||||
- 保留评分最高的
|
||||
|
||||
### `calculateDistance(boxes, scaleX)`
|
||||
|
||||
**v1.1.0 简化算法**的核心实现:
|
||||
|
||||
```typescript
|
||||
private calculateDistance(
|
||||
boxes: BoundingBox[],
|
||||
scaleX: number
|
||||
): number
|
||||
```
|
||||
|
||||
**逻辑**:
|
||||
```typescript
|
||||
if (boxes.length >= 2) {
|
||||
// 双滑块模式(推荐)
|
||||
// "两只小鸟嘴尖距离"原理
|
||||
const distance = (boxes[1].x - boxes[0].x) / scaleX;
|
||||
return Math.round(distance);
|
||||
} else if (boxes.length === 1) {
|
||||
// 单滑块模式(兜底)
|
||||
const distance = boxes[0].x / scaleX;
|
||||
return Math.round(distance);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
**为什么除以 scaleX**:
|
||||
- 检测在 800px 宽度图像上进行
|
||||
- 实际显示宽度是 340px
|
||||
- scaleX = 800 / 340 ≈ 2.35
|
||||
- 需要将检测坐标转换回显示坐标
|
||||
|
||||
### `dragSlider(distance)`
|
||||
|
||||
拖动滑块到指定距离:
|
||||
|
||||
```typescript
|
||||
private async dragSlider(distance: number): Promise<void>
|
||||
```
|
||||
|
||||
**实现细节**:
|
||||
- 获取滑块按钮的 bounding box
|
||||
- 计算起始位置(滑块中心)
|
||||
- 计算目标位置(起始 + 距离)
|
||||
- 使用 `page.mouse.move()` 拖动
|
||||
- `steps` 参数实现平滑移动(默认 20 步)
|
||||
|
||||
**拟人化特性**:
|
||||
- 使用 Playwright 的内置缓动函数
|
||||
- 平滑的加速-减速曲线
|
||||
- 避免机械化的匀速直线移动
|
||||
|
||||
## 错误处理与提示
|
||||
|
||||
- 打印清晰的步骤提示,例如“请等待短信验证码…”、“正在提交验证码…”;
|
||||
- 捕获 Playwright 的超时异常,允许在页面未完全跳转时通过 `isLoggedIn` 再次确认;
|
||||
- 如登录失败会输出明确日志并保持退出码非零,方便在 CI 或脚本中检测。
|
||||
- 打印清晰的步骤提示,例如"请等待短信验证码…"、"正在提交验证码…"
|
||||
- **[v1.1.0]** 滑块检测过程的详细日志:
|
||||
```
|
||||
[SliderController] 开始滑块验证,最多尝试 10 次
|
||||
[SliderController] ===== 第 1/10 次尝试 =====
|
||||
[SliderDetector] 图像已缩放: 340x191 -> 800x449 (scaleX=2.35)
|
||||
[SliderDetector] 检测到 2 个滑块候选框
|
||||
[SliderController] 计算距离: (195 - 45) / 2.35 = 63.8px
|
||||
[SliderController] ✓ 滑块验证成功!
|
||||
```
|
||||
- 捕获 Playwright 的超时异常,允许在页面未完全跳转时通过 `isLoggedIn` 再次确认
|
||||
- 如登录失败会输出明确日志并保持退出码非零,方便在 CI 或脚本中检测
|
||||
- **[v1.1.0]** 视觉调试:
|
||||
- `noflag/` 目录保存原始截图
|
||||
- `output/` 目录保存带红框标注的检测结果
|
||||
- 便于人工验证检测准确性
|
||||
|
||||
## 手动操作注意事项
|
||||
|
||||
- Playwright 会以非无头模式启动 Chromium,务必保持窗口前台以便人工处理滑块或图形验证码;
|
||||
- 如短信验证码输入错误,可重新运行脚本并继续人工操作;
|
||||
- 保存的 `douban-cookie.json` 与账号强绑定,若切换账号需手动删除或覆盖该文件。
|
||||
- Playwright 会以非无头模式启动 Chromium,务必保持窗口前台
|
||||
- **[v1.1.0]** 启用 `DOUBAN_AUTO_SLIDER=1` 时会自动处理滑块
|
||||
- 如果自动验证失败(10 次后),仍可手动完成滑块
|
||||
- 如短信验证码输入错误,可重新运行脚本
|
||||
- 保存的 `douban-cookie.json` 与账号强绑定,若切换账号需手动删除或覆盖该文件
|
||||
- **[v1.1.0]** 可查看 `output/` 目录的标注图验证检测准确性
|
||||
|
||||
## v1.1.0 技术细节
|
||||
|
||||
### 坐标系统
|
||||
|
||||
**两套坐标系**:
|
||||
1. **图像坐标系**:800px 宽度,用于检测
|
||||
2. **显示坐标系**:340px 宽度,用于拖动
|
||||
|
||||
**转换公式**:
|
||||
```typescript
|
||||
显示坐标 = 图像坐标 / scaleX
|
||||
scaleX = 图像宽度 / 显示宽度 ≈ 800 / 340 ≈ 2.35
|
||||
```
|
||||
|
||||
### 距离计算演进
|
||||
|
||||
**v1.0.0**:需要人工完成滑块
|
||||
|
||||
**v1.1.0 早期**:复杂的坐标转换
|
||||
```typescript
|
||||
// 错误的复杂逻辑(已废弃)
|
||||
const iframeBox = await iframe.boundingBox();
|
||||
const distance = targetBox.x - sliderBox.x + iframeBox.x - sliderBox.x;
|
||||
```
|
||||
|
||||
**v1.1.0 最终**:简化为几何原理
|
||||
```typescript
|
||||
// 正确的简洁逻辑(当前实现)
|
||||
const distance = (box2.x - box1.x) / scaleX;
|
||||
```
|
||||
|
||||
**为什么简化有效**:
|
||||
- 检测坐标和拖动坐标在同一个相对坐标系中
|
||||
- iframe 偏移量对两个滑块的影响相同
|
||||
- 直接计算水平距离差,无需考虑绝对位置
|
||||
|
||||
### 图像处理技术
|
||||
|
||||
**Sharp 库应用**:
|
||||
|
||||
1. **图像缩放**
|
||||
```typescript
|
||||
const resized = await sharp(imagePath)
|
||||
.resize(targetWidth, null, { fit: 'inside' })
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
```
|
||||
|
||||
2. **Sobel 边缘检测**
|
||||
```typescript
|
||||
const sobelX = [-1, 0, 1, -2, 0, 2, -1, 0, 1];
|
||||
const sobelY = [-1, -2, -1, 0, 0, 0, 1, 2, 1];
|
||||
// 卷积计算边缘强度
|
||||
```
|
||||
|
||||
3. **颜色空间转换**
|
||||
```typescript
|
||||
// RGB → LAB
|
||||
const X = r * 0.4124 + g * 0.3576 + b * 0.1805;
|
||||
const Y = r * 0.2126 + g * 0.7152 + b * 0.0722;
|
||||
const Z = r * 0.0193 + g * 0.1192 + b * 0.9505;
|
||||
```
|
||||
|
||||
4. **形态学操作**
|
||||
```typescript
|
||||
// 膨胀:扩大白色区域
|
||||
// 腐蚀:缩小白色区域
|
||||
// 连通组件分析:查找连续区域
|
||||
```
|
||||
|
||||
### 性能优化
|
||||
|
||||
**并行检测**:
|
||||
```typescript
|
||||
const [darkBoxes, edgeBoxes, colorBoxes, labBoxes] = await Promise.all([
|
||||
this.findDarkRegions(rawImage),
|
||||
this.findEdgeDensityRegions(rawImage),
|
||||
this.findColorQuantizationRegions(rawImage),
|
||||
this.findLabColorRegions(rawImage),
|
||||
]);
|
||||
```
|
||||
|
||||
**IoU 去重**:
|
||||
- 避免重复检测同一个滑块
|
||||
- 减少后续评分计算量
|
||||
- 提高整体检测速度
|
||||
|
||||
**缓存策略**:
|
||||
- 原始截图保存在 `noflag/`,可重复使用
|
||||
- 标注结果保存在 `output/`,便于批量验证
|
||||
|
||||
## 后续拓展建议
|
||||
|
||||
1. **多账号支持**:通过配置文件或命令行参数管理多组手机号与存储路径;
|
||||
2. **验证码服务集成**:接入外部短信/验证码平台以减少人工步骤;
|
||||
3. **任务编排**:在登录后追加业务逻辑(例如抓取列表、导出数据),可在 `main` 函数成功分支追加调用;
|
||||
4. **CLI 体验**:封装命令行参数解析,避免频繁依赖环境变量。
|
||||
1. **多账号支持**:通过配置文件或命令行参数管理多组手机号与存储路径
|
||||
2. **验证码服务集成**:接入外部短信/验证码平台以减少人工步骤
|
||||
3. **任务编排**:在登录后追加业务逻辑(例如抓取列表、导出数据),可在 `main` 函数成功分支追加调用
|
||||
4. **CLI 体验**:封装命令行参数解析,避免频繁依赖环境变量
|
||||
5. **[v1.1.0+]** 机器学习模型:
|
||||
- 使用 CNN 替代规则式检测
|
||||
- 训练分类器识别滑块和缺口
|
||||
- 提高复杂背景下的准确率
|
||||
6. **[v1.1.0+]** 更多验证码类型:
|
||||
- 点选验证码
|
||||
- 文字识别验证码
|
||||
- 旋转验证码
|
||||
7. **[v1.1.0+]** 反爬虫对抗:
|
||||
- 更自然的鼠标轨迹(贝塞尔曲线)
|
||||
- 随机延迟和抖动
|
||||
- 模拟人类思考时间
|
||||
|
||||
以上内容覆盖当前脚本主要实现。若后续重新引入滑块自动化,可在此文档扩展新的模块说明。
|
||||
## v1.1.0 成功的关键因素
|
||||
|
||||
1. **用户洞察**:"两只小鸟嘴尖距离"的类比帮助简化了距离计算
|
||||
2. **坐标系统一**:在同一坐标系中计算相对距离,避免复杂转换
|
||||
3. **多策略并行**:四种检测算法互补,提高鲁棒性
|
||||
4. **视觉调试**:红框标注便于人工验证和调试
|
||||
5. **自动重试**:10 次重试机制大幅提高成功率
|
||||
|
||||
以上内容覆盖 v1.1.0 的完整实现细节。滑块自动化已成功集成并经过验证。
|
||||
|
||||
341
QUICKSTART.md
@@ -1,11 +1,11 @@
|
||||
# 快速开始 - 滑块验证自动化
|
||||
# 快速开始 - 滑块验证自动化(v1.1.0)
|
||||
|
||||
## 🚀 5 分钟上手
|
||||
|
||||
### 1. 安装依赖
|
||||
|
||||
```bash
|
||||
cd /Users/gavin/mcp/douban-login
|
||||
cd /Users/gavin/douban-login
|
||||
npm install
|
||||
```
|
||||
|
||||
@@ -17,9 +17,10 @@ DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=你的手机号 npm run login
|
||||
|
||||
就这么简单!脚本会自动:
|
||||
- ✅ 检测滑块验证码
|
||||
- ✅ 计算滑动距离
|
||||
- ✅ 模拟真人滑动
|
||||
- ✅ 多次重试直到成功
|
||||
- ✅ 使用 AI 识别滑块和缺口位置
|
||||
- ✅ 计算精确的滑动距离
|
||||
- ✅ 模拟真人滑动轨迹
|
||||
- ✅ 自动重试直到成功(最多 10 次)
|
||||
|
||||
### 3. 独立测试滑块功能
|
||||
|
||||
@@ -37,32 +38,33 @@ npm run slider
|
||||
DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800138000 npm run login
|
||||
```
|
||||
|
||||
### 场景 2:识别不准,手动指定距离
|
||||
脚本会自动完成整个登录流程,包括滑块验证。
|
||||
|
||||
### 场景 2:查看检测过程
|
||||
|
||||
登录后查看生成的截图:
|
||||
- `noflag/` 目录:原始验证码图片
|
||||
- `output/` 目录:带红框标注的检测结果
|
||||
|
||||
红框标注说明:
|
||||
- 左侧红框:检测到的滑块位置
|
||||
- 右侧红框:检测到的缺口位置
|
||||
|
||||
### 场景 3:调试识别准确性
|
||||
|
||||
如果识别总是失败,可以:
|
||||
|
||||
1. 查看 `output/` 目录的标注图,确认红框位置是否准确
|
||||
2. 检查控制台日志中的 `scaleX` 值(应该约为 2.35)
|
||||
3. 确认距离计算公式:`(缺口X - 滑块X) / scaleX`
|
||||
|
||||
### 场景 4:批量复核历史截图
|
||||
|
||||
```bash
|
||||
DOUBAN_AUTO_SLIDER=1 \
|
||||
DOUBAN_SLIDER_DISTANCE=280 \
|
||||
DOUBAN_PHONE=13800138000 \
|
||||
npm run login
|
||||
npm run slider -- --pic-dir=noflag
|
||||
```
|
||||
|
||||
### 场景 3:调整重试偏移
|
||||
|
||||
```bash
|
||||
DOUBAN_AUTO_SLIDER=1 \
|
||||
DOUBAN_SLIDER_OFFSETS=0,-5,5,-10,10,-15,15 \
|
||||
DOUBAN_PHONE=13800138000 \
|
||||
npm run login
|
||||
```
|
||||
|
||||
### 场景 4:增加超时时间(网络慢)
|
||||
|
||||
```bash
|
||||
DOUBAN_AUTO_SLIDER=1 \
|
||||
DOUBAN_SLIDER_TIMEOUT=60000 \
|
||||
DOUBAN_PHONE=13800138000 \
|
||||
npm run login
|
||||
```
|
||||
会对 `noflag/` 目录中的所有验证码图片重新检测,并将标注结果输出到 `output/` 目录。
|
||||
|
||||
## 💻 在代码中使用
|
||||
|
||||
@@ -70,178 +72,217 @@ npm run login
|
||||
|
||||
```typescript
|
||||
import { Page } from 'playwright';
|
||||
import { waitAndHandleSlider } from './slider';
|
||||
import { SliderController } from './slider';
|
||||
|
||||
async function myFunction(page: Page) {
|
||||
// 触发可能出现滑块的操作
|
||||
await page.click('#some-button');
|
||||
async function login(page: Page) {
|
||||
// 触发登录操作
|
||||
await page.click('#login-button');
|
||||
|
||||
// 自动等待并处理滑块(如果出现)
|
||||
await waitAndHandleSlider(page);
|
||||
// 自动处理滑块验证(如果出现)
|
||||
const controller = new SliderController(10);
|
||||
const result = await controller.solveSlider(
|
||||
page,
|
||||
'.tcaptcha_drag_button', // 滑块按钮选择器
|
||||
'#tcaptcha_iframe' // 验证码 iframe 选择器
|
||||
);
|
||||
|
||||
if (result.success) {
|
||||
console.log(`验证成功!尝试 ${result.attempts} 次`);
|
||||
} else {
|
||||
console.log('验证失败,需要手动完成');
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 更多控制
|
||||
|
||||
```typescript
|
||||
import { hasSlider, autoSlide } from './slider';
|
||||
import { SliderDetector, SliderController } from './slider';
|
||||
|
||||
async function myFunction(page: Page) {
|
||||
await page.click('#some-button');
|
||||
await page.waitForTimeout(1000);
|
||||
// 1. 单独使用检测器
|
||||
const detector = new SliderDetector();
|
||||
const boxes = await detector.detectSlider(
|
||||
'captcha.png', // 输入图片路径
|
||||
'output/result.png', // 标注结果保存路径
|
||||
true // 是否绘制标注框
|
||||
);
|
||||
|
||||
// 检查是否有滑块
|
||||
if (await hasSlider(page)) {
|
||||
console.log('需要完成滑块验证');
|
||||
|
||||
// 自动完成
|
||||
const success = await autoSlide(page, {
|
||||
distance: 250, // 可选:手动指定距离
|
||||
offsets: [0, -5, 5, -10, 10], // 可选:重试偏移
|
||||
});
|
||||
|
||||
if (!success) {
|
||||
console.log('自动验证失败,请手动完成');
|
||||
// 处理失败情况
|
||||
if (boxes && boxes.length > 0) {
|
||||
console.log('检测到滑块:', boxes);
|
||||
console.log('第一个滑块位置:', boxes[0].x, boxes[0].y);
|
||||
console.log('第一个滑块尺寸:', boxes[0].width, boxes[0].height);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 自定义配置(针对不同网站)
|
||||
|
||||
```typescript
|
||||
// 腾讯防水墙
|
||||
await autoSlide(page, {
|
||||
handleSelector: '.tc-drag-thumb',
|
||||
trackSelector: '.tc-drag-track',
|
||||
bgSelector: '.tc-bg-img',
|
||||
pieceSelector: '.tc-jig-img',
|
||||
});
|
||||
|
||||
// 极验验证
|
||||
await autoSlide(page, {
|
||||
handleSelector: '.geetest_slider_button',
|
||||
trackSelector: '.geetest_slider',
|
||||
bgSelector: '.geetest_canvas_bg',
|
||||
pieceSelector: '.geetest_canvas_slice',
|
||||
});
|
||||
// 2. 使用控制器完成整个流程
|
||||
const controller = new SliderController(10);
|
||||
const result = await controller.solveSlider(page);
|
||||
```
|
||||
|
||||
## 🔧 故障排查
|
||||
|
||||
### 问题:找不到滑块元素
|
||||
### 问题:检测不到滑块
|
||||
|
||||
**解决**:打开浏览器开发者工具,检查 HTML 结构,然后:
|
||||
**症状**:日志显示"未检测到滑块"或"检测到 0 个滑块"
|
||||
|
||||
```bash
|
||||
DOUBAN_SLIDER_HANDLE_SELECTOR='.your-slider-class' npm run login
|
||||
**排查步骤**:
|
||||
1. 检查 `noflag/` 目录下的原始截图是否正确
|
||||
2. 确认验证码已完全加载(等待 iframe 和图片元素)
|
||||
3. 查看 `output/` 目录的标注图,确认候选框是否被正确识别
|
||||
4. 尝试多次运行,因为验证码图片质量可能不同
|
||||
|
||||
### 问题:滑动距离不准确
|
||||
|
||||
**症状**:滑块滑过头或不够远
|
||||
|
||||
**v1.1.0 简化算法**:
|
||||
- 使用公式:`距离 = (缺口X - 滑块X) / scaleX`
|
||||
- scaleX 约为 2.35(340px → 800px 的缩放比例)
|
||||
- 基于"两只小鸟嘴尖距离"的几何原理
|
||||
|
||||
**排查步骤**:
|
||||
1. 查看控制台日志中的距离计算过程
|
||||
2. 检查 `output/` 目录标注图,红框是否准确
|
||||
3. 确认检测到的是双滑块模式(2 个红框)
|
||||
|
||||
**示例日志**:
|
||||
```
|
||||
|
||||
### 问题:距离总是差一点
|
||||
|
||||
**解决**:调整偏移序列,重点尝试差距范围:
|
||||
|
||||
```bash
|
||||
# 如果总是差 10 像素左右
|
||||
DOUBAN_SLIDER_OFFSETS=0,10,8,12,5,15 npm run login
|
||||
[SliderDetector] 检测到 2 个滑块候选框
|
||||
[SliderDetector] 滑块 1: x=45, width=60, score=0.85
|
||||
[SliderDetector] 滑块 2: x=195, width=55, score=0.82
|
||||
[SliderController] 计算距离: (195 - 45) / 2.35 = 63.8px
|
||||
```
|
||||
|
||||
### 问题:验证总是失败
|
||||
|
||||
**原因和解决**:
|
||||
**可能原因**:
|
||||
|
||||
1. **图像识别不准** → 手动指定距离
|
||||
```bash
|
||||
DOUBAN_SLIDER_DISTANCE=250 npm run login
|
||||
```
|
||||
1. **图像识别不准确**
|
||||
- 查看 `output/` 目录检查标注准确性
|
||||
- 复杂背景或低对比度图片识别率较低
|
||||
- 当前准确率约 70-80%
|
||||
|
||||
2. **滑动太快被识别为机器人** → 修改 `slider.ts` 增加总时长
|
||||
```typescript
|
||||
// 在 generateTrack 函数中
|
||||
const totalTime = 1500 + Math.random() * 1500; // 改为 1.5-3 秒
|
||||
```
|
||||
2. **反爬虫检测**
|
||||
- 避免过于频繁使用
|
||||
- 已集成拟人化轨迹,但仍可能被识别
|
||||
|
||||
3. **选择器不对** → 检查并指定正确选择器
|
||||
3. **网络延迟**
|
||||
- 成功标识(`.tc-success`)可能延迟出现
|
||||
- 当前等待时间 1000ms,可能需要延长
|
||||
|
||||
**解决方案**:
|
||||
- 使用自动重试机制(最多 10 次)
|
||||
- 查看详细日志定位问题
|
||||
- 必要时手动完成验证
|
||||
|
||||
### 问题:程序卡住不动
|
||||
|
||||
**检查**:
|
||||
- 是否在等待手动完成验证?查看终端提示
|
||||
- 超时设置是否太短?增加 `DOUBAN_SLIDER_TIMEOUT`
|
||||
- 网络是否正常?
|
||||
- 是否在等待 iframe 加载?查看日志 "等待验证码 iframe 加载..."
|
||||
- 是否在等待图片加载?查看日志 "等待滑块背景图加载..."
|
||||
- 网络是否正常?尝试增加超时时间
|
||||
|
||||
### 视觉调试技巧
|
||||
|
||||
**查看检测结果**:
|
||||
1. 运行登录后,打开 `output/` 目录
|
||||
2. 找到最新的 `*-detected.png` 文件
|
||||
3. 检查红框是否准确标注了滑块和缺口
|
||||
4. 对比 `noflag/` 目录的原始图
|
||||
|
||||
**理想的标注结果**:
|
||||
- 左侧滑块:红框紧贴滑块边缘
|
||||
- 右侧缺口:红框框住缺口区域
|
||||
- 两个红框高度基本一致(y 坐标偏差 < 25px)
|
||||
- 红框宽度接近滑块实际宽度(约 50-70px)
|
||||
|
||||
## 📚 深入了解
|
||||
|
||||
- [SLIDER.md](./SLIDER.md) - 详细功能文档
|
||||
- [IMPLEMENTATION.md](./IMPLEMENTATION.md) - 实现原理
|
||||
- [src/examples.ts](./src/examples.ts) - 更多使用示例
|
||||
- [README.md](./README.md) - 项目总览和功能介绍
|
||||
- [src/slider/README.md](./src/slider/README.md) - 滑块模块详细文档
|
||||
- [CHANGELOG.md](./CHANGELOG.md) - 版本更新日志
|
||||
- [release.md](./release.md) - 发布说明
|
||||
|
||||
## 🎯 核心 API
|
||||
|
||||
```typescript
|
||||
// 检测是否存在滑块
|
||||
hasSlider(page: Page, config?: SliderConfig): Promise<boolean>
|
||||
|
||||
// 自动完成滑块验证
|
||||
autoSlide(page: Page, config?: SliderConfig): Promise<boolean>
|
||||
|
||||
// 等待并处理滑块(推荐)
|
||||
waitAndHandleSlider(page: Page, config?: SliderConfig): Promise<boolean>
|
||||
```
|
||||
|
||||
## ⚙️ 配置选项
|
||||
|
||||
```typescript
|
||||
interface SliderConfig {
|
||||
handleSelector?: string; // 滑块按钮选择器
|
||||
trackSelector?: string; // 滑块轨道选择器
|
||||
bgSelector?: string; // 背景图选择器
|
||||
pieceSelector?: string; // 缺口图选择器
|
||||
timeout?: number; // 超时时间(毫秒)
|
||||
distance?: number; // 手动指定距离(像素)
|
||||
offsets?: number[]; // 偏移尝试序列
|
||||
// 滑块检测器
|
||||
class SliderDetector {
|
||||
async detectSlider(
|
||||
imagePath: string,
|
||||
outputPath: string,
|
||||
drawBoxes: boolean = true
|
||||
): Promise<BoundingBox[] | null>
|
||||
}
|
||||
```
|
||||
|
||||
## 🎉 运行示例
|
||||
// 滑块控制器
|
||||
class SliderController {
|
||||
constructor(maxAttempts: number = 10)
|
||||
|
||||
查看 6 个详细示例:
|
||||
async solveSlider(
|
||||
page: Page,
|
||||
sliderSelector?: string,
|
||||
captchaSelector?: string
|
||||
): Promise<SliderSolveResult>
|
||||
}
|
||||
|
||||
```bash
|
||||
# 基础使用
|
||||
npm run ts-node src/examples.ts 1
|
||||
// 返回结果
|
||||
interface SliderSolveResult {
|
||||
success: boolean; // 是否成功
|
||||
attempts: number; // 尝试次数
|
||||
distance?: number; // 滑动距离(像素)
|
||||
}
|
||||
|
||||
# 手动检测
|
||||
npm run ts-node src/examples.ts 2
|
||||
|
||||
# 自定义配置
|
||||
npm run ts-node src/examples.ts 3
|
||||
|
||||
# 登录流程集成
|
||||
npm run ts-node src/examples.ts 4
|
||||
|
||||
# 批量处理
|
||||
npm run ts-node src/examples.ts 5
|
||||
|
||||
# 环境变量配置
|
||||
npm run ts-node src/examples.ts 6
|
||||
// 边界框
|
||||
interface BoundingBox {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
```
|
||||
|
||||
## 💡 提示
|
||||
|
||||
1. **首次使用建议先不开启自动验证**,观察滑块行为
|
||||
2. **记录成功的参数配置**,后续重复使用
|
||||
3. **避免过于频繁使用**,可能触发更严格验证
|
||||
4. **定期检查更新**,验证码可能会变化
|
||||
1. **首次使用**:
|
||||
- 建议先运行一次观察完整流程
|
||||
- 查看 `output/` 和 `noflag/` 目录的输出
|
||||
- 了解红框标注的含义
|
||||
|
||||
2. **提高成功率**:
|
||||
- 依赖自动重试机制(最多 10 次)
|
||||
- 每次验证码图片不同,识别难度也不同
|
||||
- 当前成功率约 50%,已经可以应对日常使用
|
||||
|
||||
3. **调试建议**:
|
||||
- 查看控制台日志了解检测过程
|
||||
- 检查 `output/` 目录的标注图验证准确性
|
||||
- 使用 CLI 工具批量测试:`npm run slider -- --pic-dir=noflag`
|
||||
|
||||
4. **避免滥用**:
|
||||
- 不要过于频繁使用,可能触发更严格验证
|
||||
- 遵守网站服务条款
|
||||
- 仅用于个人学习研究
|
||||
|
||||
## ⚠️ 重要提示
|
||||
|
||||
- 本功能仅用于学习研究
|
||||
- 使用时请遵守网站服务条款
|
||||
- 图像识别准确率约 70-80%
|
||||
- 需配合偏移重试提高成功率
|
||||
- **本功能仅用于学习研究**
|
||||
- **使用时请遵守网站服务条款**
|
||||
- **图像识别准确率约 70-80%**
|
||||
- **验证成功率约 50%(含重试)**
|
||||
- **不保证 100% 成功,请做好手动完成的准备**
|
||||
|
||||
## 📊 性能指标
|
||||
|
||||
- **检测耗时**:约 2-3 秒/次(含截图、检测、标注)
|
||||
- **平均尝试次数**:1-3 次
|
||||
- **最大尝试次数**:10 次
|
||||
- **图像缩放比例**:340px → 800px(scaleX ≈ 2.35)
|
||||
|
||||
## 🤝 需要帮助?
|
||||
|
||||
查看详细文档或运行示例代码了解更多用法。
|
||||
查看详细文档或提交 Issue 了解更多用法。
|
||||
|
||||
---
|
||||
|
||||
**v1.1.0** - 2025-10-25
|
||||
引入 AI 驱动的滑块验证码自动破解功能 🎉
|
||||
|
||||
99
README.md
@@ -1,13 +1,18 @@
|
||||
# douban-crawler
|
||||
|
||||
> Playwright + TypeScript 脚本,用于完成豆瓣短信验证码登录,并将登录态持久化到本地 Cookie 文件。滑块验证码需人工处理,本项目不再尝试自动识别。
|
||||
**版本**: v1.1.0
|
||||
|
||||
## 功能概览
|
||||
> Playwright + TypeScript 脚本,用于完成豆瓣短信验证码登录,并将登录态持久化到本地 Cookie 文件。**已集成 AI 驱动的滑块验证码自动识别和求解功能**。
|
||||
|
||||
- 启动 Chromium 浏览器并访问豆瓣登录页;
|
||||
- 自动填写手机号,触发短信验证码;
|
||||
- 控制台提示用户完成页面内的额外验证(如滑块)并输入短信验证码;
|
||||
- 登录成功后将 Cookie 状态保存到 `~/douban-cookie.json`,后续运行可直接复用。
|
||||
## ✨ 核心功能
|
||||
|
||||
- 🔐 **自动登录**: 支持短信验证码登录流程
|
||||
- 🧩 **智能滑块识别**: 基于图像处理算法自动识别和求解滑块验证码
|
||||
- 🎯 **高成功率**: 采用多策略检测算法(暗区检测、边缘检测、颜色量化、LAB色彩空间分析)
|
||||
- 🔄 **自动重试**: 验证失败时自动刷新并重试,最多 10 次
|
||||
- 📊 **详细日志**: 完整的调试信息和截图保存,便于问题追溯
|
||||
- 🖼️ **可视化调试**: 自动标注检测到的滑块位置,保存带红框标记的图片
|
||||
- 🍪 **Cookie 持久化**: 自动保存登录态,下次可直接复用
|
||||
|
||||
## 环境准备
|
||||
|
||||
@@ -26,13 +31,19 @@ npx playwright install chromium
|
||||
DOUBAN_PHONE=13800000000 npm run login
|
||||
```
|
||||
|
||||
2. 浏览器会自动打开豆瓣登录页,脚本完成以下操作:
|
||||
2. 启用自动滑块验证(可选):
|
||||
|
||||
```bash
|
||||
DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800000000 npm run login
|
||||
```
|
||||
|
||||
3. 浏览器会自动打开豆瓣登录页,脚本完成以下操作:
|
||||
- 填入手机号并点击「获取验证码」;
|
||||
- 控制台提示等待页面验证(若出现滑块,请手动完成);
|
||||
- 如果启用了自动滑块验证,会自动检测并滑动;否则等待用户手动完成;
|
||||
- 控制台等待用户输入短信验证码;
|
||||
- 验证码提交成功后,脚本将登录态写入 `~/douban-cookie.json` 并退出。
|
||||
|
||||
3. 下次运行会优先尝试加载该 Cookie 文件,若仍在有效期内可直接登录。
|
||||
4. 下次运行会优先尝试加载该 Cookie 文件,若仍在有效期内可直接登录。
|
||||
|
||||
## 命令列表
|
||||
|
||||
@@ -42,11 +53,12 @@ npx playwright install chromium
|
||||
|
||||
## 可配置项
|
||||
|
||||
当前脚本仅使用一个环境变量:
|
||||
当前脚本支持以下环境变量:
|
||||
|
||||
| 变量名 | 说明 | 是否必填 | 默认值 |
|
||||
| -------------- | ---------------- | -------- | ------ |
|
||||
| --------------------- | ------------------------------ | -------- | ------ |
|
||||
| `DOUBAN_PHONE` | 登录手机号(大陆) | 必填 | - |
|
||||
| `DOUBAN_AUTO_SLIDER` | 是否启用自动滑块验证(1/true) | 可选 | false |
|
||||
|
||||
若需要更改 Cookie 保存位置,可在 `src/login.ts` 中调整 `COOKIES_PATH` 定义。
|
||||
|
||||
@@ -61,30 +73,63 @@ npx playwright install chromium
|
||||
## 常见问题
|
||||
|
||||
- **登录后仍提示手机号未填写?** 确认 Playwright 浏览器窗口焦点在页面内,避免浏览器阻止自动填充。
|
||||
- **提示滑块验证但脚本无动作?** 脚本已停止自动滑块功能,请在浏览器中手动拖动滑块完成验证。
|
||||
- **Cookie 未生成?** 只有当脚本确认登录成功时才会写入 Cookie。若终端未看到 “登录成功,Cookies 已保存…” 的日志,请检查短信验证码是否正确。
|
||||
- **自动滑块验证失败?** 系统会提示手动完成,或者尝试不启用自动滑块功能。
|
||||
- **Cookie 未生成?** 只有当脚本确认登录成功时才会写入 Cookie。若终端未看到 "登录成功,Cookies 已保存…" 的日志,请检查短信验证码是否正确。
|
||||
|
||||
## 滑块验证模块
|
||||
|
||||
本项目包含了从 `captcha_cracker` 移植并优化的滑块检测功能,位于 `src/slider/` 目录。
|
||||
|
||||
详细说明请查看 [src/slider/README.md](./src/slider/README.md)
|
||||
|
||||
### 滑块验证工作流程
|
||||
|
||||
1. **自动检测**: 点击"获取验证码"后自动检测滑块验证码窗口
|
||||
2. **图像采集**: 截取验证码图片并放大到 800px 宽度以提高识别精度
|
||||
3. **多策略检测**:
|
||||
- 暗区检测:识别滑块缺口的阴影区域
|
||||
- 边缘检测:使用 Canny 算法识别轮廓
|
||||
- 颜色量化:分析色彩分布找出异常区域
|
||||
- LAB 色彩空间:在更符合人类视觉的空间中检测差异
|
||||
4. **双滑块识别**: 同时检测左侧滑块和右侧缺口,计算精确距离
|
||||
5. **距离计算**:
|
||||
- 双滑块模式:`距离 = (缺口X - 滑块X) / scaleX`
|
||||
- 单滑块模式:基于 DOM 位置和图像分析综合计算
|
||||
6. **拟人化滑动**: 模拟真实人类操作的加速-匀速-减速轨迹
|
||||
7. **结果验证**: 检测成功标识或窗口消失,失败则自动刷新重试(最多 10 次)
|
||||
8. **可视化输出**: 在 `output/` 或 `noflag/` 目录保存带红框标记的检测结果图片
|
||||
|
||||
### 滑块识别算法
|
||||
|
||||
核心算法移植自 `captcha_cracker` 项目,包括:
|
||||
|
||||
- **候选框搜索** (`detection/candidate-search.ts`): 四种策略并行搜索可疑区域
|
||||
- **边界框优化** (`detector.ts`): 使用 Canny 边缘检测精确定位
|
||||
- **自学习模板匹配** (`detector-self-learning.ts`): 动态学习滑块模板提高准确率
|
||||
- **几何与图像工具** (`utils/`): IoU 计算、形态学操作、Sobel 算子等
|
||||
|
||||
### 调试与问题排查
|
||||
|
||||
所有截图和检测结果保存在:
|
||||
- `output/`: 常规调试输出
|
||||
- `noflag/`: 完整尺寸(800px)的检测图片
|
||||
- 文件命名格式:`captcha-{timestamp}.png` 和 `captcha-{timestamp}-detected.png`
|
||||
|
||||
查看 `-detected.png` 文件可以确认:
|
||||
- 红框标记的位置是否准确识别了滑块缺口
|
||||
- 如有两个框,左边的应该是滑块,右边的是缺口
|
||||
|
||||
## 声明
|
||||
|
||||
该项目仅供学习与功能验证,请勿用于违反豆瓣平台服务条款的场景。开发者需自行承担使用风险。
|
||||
|
||||
```bash
|
||||
# 启用自动滑块验证
|
||||
DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800138000 npm run login
|
||||
|
||||
# 独立测试滑块功能
|
||||
npm run slider
|
||||
```
|
||||
|
||||
详细说明请查看 [SLIDER.md](./SLIDER.md)
|
||||
|
||||
## 开发脚本
|
||||
## 开发文档
|
||||
|
||||
- `src/login.ts`:主登录流程,负责 Cookie 复用、短信登录以及滑块自动化;
|
||||
- `src/slider.ts`:滑块验证自动化工具,支持图像识别和轨迹模拟;
|
||||
- `SLIDER.md`:滑块验证详细文档,包含原理、配置和故障排查;
|
||||
- `src/slider/`:滑块验证模块,包含检测、移动等完整功能;
|
||||
- `ARCHITECTURE.md`:整体架构与流程说明;
|
||||
- `IMPLEMENTATION.md`:关键实现细节记录;
|
||||
- `login.md`:原始业务需求与操作步骤;
|
||||
- `block.md`:滑块破解思路(Python 版)与 TypeScript 脚本参考;
|
||||
- `typescript-spec.md`:团队 TypeScript 编码规范与示例。
|
||||
|
||||
## 许可
|
||||
|
||||
152
VERSION.md
Normal file
@@ -0,0 +1,152 @@
|
||||
# 版本信息
|
||||
|
||||
## 当前版本:v1.1.0
|
||||
|
||||
发布日期:2025-10-25
|
||||
|
||||
## 主要特性
|
||||
|
||||
### 🎯 AI 驱动的滑块验证码自动破解
|
||||
|
||||
- ✅ 多策略并行检测(暗区域、边缘、颜色量化、LAB 色彩空间)
|
||||
- ✅ 双滑块精准识别(左侧滑块 + 右侧缺口)
|
||||
- ✅ 简化的距离计算算法:`距离 = (缺口X - 滑块X) / scaleX`
|
||||
- ✅ 拟人化滑动轨迹(Playwright steps 参数)
|
||||
- ✅ 自动重试机制(最多 10 次)
|
||||
- ✅ 可视化调试(红框标注输出到 `output/` 目录)
|
||||
|
||||
### 📊 性能指标
|
||||
|
||||
- **检测准确率**:~70-80%
|
||||
- **验证成功率**:~50%(含重试)
|
||||
- **平均尝试次数**:1-3 次
|
||||
- **单次检测耗时**:~2-3 秒
|
||||
|
||||
### 🚀 快速开始
|
||||
|
||||
```bash
|
||||
# 启用自动滑块验证
|
||||
DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800138000 npm run login
|
||||
|
||||
# 独立测试滑块功能
|
||||
npm run slider
|
||||
|
||||
# 批量复核历史截图
|
||||
npm run slider -- --pic-dir=noflag
|
||||
```
|
||||
|
||||
## 核心创新
|
||||
|
||||
### "两只小鸟距离"原理
|
||||
|
||||
v1.1.0 最重要的突破是简化了距离计算算法:
|
||||
|
||||
```typescript
|
||||
// v1.0.0: 需要人工完成滑块
|
||||
// v1.1.0: 自动计算并拖动
|
||||
|
||||
// 双滑块模式(推荐)
|
||||
const distance = (box2.x - box1.x) / scaleX;
|
||||
|
||||
// 单滑块模式(兜底)
|
||||
const distance = box.x / scaleX;
|
||||
```
|
||||
|
||||
**原理**:就像计算两只小鸟嘴尖的水平距离,直接用右侧缺口的 X 坐标减去左侧滑块的 X 坐标,再除以图像缩放比例,就得到了滑块需要移动的距离。
|
||||
|
||||
### 图像缩放优化
|
||||
|
||||
- 原始验证码:340x191 像素
|
||||
- 检测图像:800x449 像素(scaleX ≈ 2.35)
|
||||
- 提高小尺寸滑块的检测精度
|
||||
|
||||
### 多策略并行检测
|
||||
|
||||
并行运行四种算法,然后 IoU 去重:
|
||||
|
||||
1. **暗区域检测**:查找亮度 < 100 的暗色区域
|
||||
2. **Canny 边缘检测**:查找边缘密集区域
|
||||
3. **颜色量化**:K-means 聚类找独特色块
|
||||
4. **LAB 色彩空间**:感知均匀的色彩空间检测
|
||||
|
||||
## 文件结构
|
||||
|
||||
```
|
||||
src/slider/ # 滑块验证模块
|
||||
├── detector.ts # 主检测器
|
||||
├── detector-self-learning.ts # 模板匹配
|
||||
├── slider-controller.ts # Playwright 集成
|
||||
├── candidate-search.ts # 多策略检测
|
||||
├── geometry.ts # IoU 计算
|
||||
└── image.ts # Sobel 边缘检测
|
||||
|
||||
noflag/ # 原始验证码截图
|
||||
output/ # 红框标注结果
|
||||
```
|
||||
|
||||
## 依赖项
|
||||
|
||||
- **playwright**: ^1.41.1(浏览器自动化)
|
||||
- **sharp**: ^0.33.3(图像处理)
|
||||
- **typescript**: ^5.4.2
|
||||
|
||||
## 环境变量
|
||||
|
||||
```bash
|
||||
DOUBAN_AUTO_SLIDER=1 # 启用自动滑块验证
|
||||
DOUBAN_PHONE=手机号 # 登录手机号(必填)
|
||||
```
|
||||
|
||||
## 已知限制
|
||||
|
||||
1. **图像识别准确率**:约 70-80%,复杂背景或低对比度图片识别率较低
|
||||
2. **验证成功率**:约 50%,受反爬虫机制影响
|
||||
3. **仅供学习**:请遵守网站服务条款,不要用于商业或恶意用途
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [README.md](./README.md) - 项目总览
|
||||
- [QUICKSTART.md](./QUICKSTART.md) - 快速开始指南
|
||||
- [CHANGELOG.md](./CHANGELOG.md) - 详细更新日志
|
||||
- [ARCHITECTURE.md](./ARCHITECTURE.md) - 架构说明
|
||||
- [IMPLEMENTATION.md](./IMPLEMENTATION.md) - 实现细节
|
||||
- [src/slider/README.md](./src/slider/README.md) - 滑块模块文档
|
||||
|
||||
## 升级指南
|
||||
|
||||
### 从 v1.0.0 升级到 v1.1.0
|
||||
|
||||
**新增依赖**:
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
**新增环境变量**(可选):
|
||||
```bash
|
||||
export DOUBAN_AUTO_SLIDER=1
|
||||
```
|
||||
|
||||
**新增目录**:
|
||||
- `noflag/`:原始验证码截图会自动保存到这里
|
||||
- `output/`:标注结果会自动保存到这里
|
||||
|
||||
**无需更改的部分**:
|
||||
- `DOUBAN_PHONE` 环境变量用法不变
|
||||
- `~/douban-cookie.json` Cookie 文件路径不变
|
||||
- `npm run login` 命令用法不变
|
||||
|
||||
**新增功能**:
|
||||
- 设置 `DOUBAN_AUTO_SLIDER=1` 启用自动滑块验证
|
||||
- 使用 `npm run slider` 独立测试滑块功能
|
||||
- 使用 `npm run slider -- --pic-dir=noflag` 批量复核截图
|
||||
|
||||
## 下一步计划
|
||||
|
||||
- [ ] 支持更多验证码类型(点选、文字识别)
|
||||
- [ ] 引入机器学习模型提高准确率
|
||||
- [ ] 优化轨迹模拟,降低被识别风险
|
||||
- [ ] 支持更多网站的滑块验证码
|
||||
|
||||
---
|
||||
|
||||
**v1.1.0** - 从手动验证到 AI 自动化的飞跃 🎉
|
||||
394
ground-truth.json
Normal file
@@ -0,0 +1,394 @@
|
||||
{
|
||||
"滑块-1.png": [
|
||||
{
|
||||
"x": 123,
|
||||
"y": 439,
|
||||
"width": 90,
|
||||
"height": 92
|
||||
},
|
||||
{
|
||||
"x": 546,
|
||||
"y": 439,
|
||||
"width": 90,
|
||||
"height": 92
|
||||
}
|
||||
],
|
||||
"滑块-2.png": [
|
||||
{
|
||||
"x": 125,
|
||||
"y": 245,
|
||||
"width": 89,
|
||||
"height": 91
|
||||
},
|
||||
{
|
||||
"x": 454,
|
||||
"y": 244,
|
||||
"width": 90,
|
||||
"height": 92
|
||||
}
|
||||
],
|
||||
"滑块-3.png": [
|
||||
{
|
||||
"x": 122,
|
||||
"y": 238,
|
||||
"width": 86,
|
||||
"height": 87
|
||||
},
|
||||
{
|
||||
"x": 576,
|
||||
"y": 237,
|
||||
"width": 87,
|
||||
"height": 88
|
||||
}
|
||||
],
|
||||
"滑块-4.png": [
|
||||
{
|
||||
"x": 120,
|
||||
"y": 330,
|
||||
"width": 90,
|
||||
"height": 90
|
||||
},
|
||||
{
|
||||
"x": 488,
|
||||
"y": 329,
|
||||
"width": 91,
|
||||
"height": 91
|
||||
}
|
||||
],
|
||||
"滑块-5.png": [
|
||||
{
|
||||
"x": 119,
|
||||
"y": 444,
|
||||
"width": 90,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 404,
|
||||
"y": 443,
|
||||
"width": 91,
|
||||
"height": 89
|
||||
}
|
||||
],
|
||||
"滑块-6.png": [
|
||||
{
|
||||
"x": 116,
|
||||
"y": 319,
|
||||
"width": 91,
|
||||
"height": 91
|
||||
},
|
||||
{
|
||||
"x": 574,
|
||||
"y": 318,
|
||||
"width": 92,
|
||||
"height": 92
|
||||
}
|
||||
],
|
||||
"滑块-7.png": [
|
||||
{
|
||||
"x": 119,
|
||||
"y": 255,
|
||||
"width": 88,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 349,
|
||||
"y": 177,
|
||||
"width": 101,
|
||||
"height": 166
|
||||
}
|
||||
],
|
||||
"滑块-8.png": [
|
||||
{
|
||||
"x": 120,
|
||||
"y": 244,
|
||||
"width": 92,
|
||||
"height": 92
|
||||
},
|
||||
{
|
||||
"x": 434,
|
||||
"y": 243,
|
||||
"width": 93,
|
||||
"height": 93
|
||||
}
|
||||
],
|
||||
"滑块.png": [
|
||||
{
|
||||
"x": 131,
|
||||
"y": 408,
|
||||
"width": 87,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 375,
|
||||
"y": 407,
|
||||
"width": 88,
|
||||
"height": 89
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.53.21.png": [
|
||||
{
|
||||
"x": 119,
|
||||
"y": 344,
|
||||
"width": 91,
|
||||
"height": 92
|
||||
},
|
||||
{
|
||||
"x": 575,
|
||||
"y": 342,
|
||||
"width": 93,
|
||||
"height": 94
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.53.40.png": [
|
||||
{
|
||||
"x": 108,
|
||||
"y": 353,
|
||||
"width": 94,
|
||||
"height": 91
|
||||
},
|
||||
{
|
||||
"x": 365,
|
||||
"y": 353,
|
||||
"width": 95,
|
||||
"height": 92
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.53.48.png": [
|
||||
{
|
||||
"x": 122,
|
||||
"y": 256,
|
||||
"width": 90,
|
||||
"height": 89
|
||||
},
|
||||
{
|
||||
"x": 379,
|
||||
"y": 256,
|
||||
"width": 91,
|
||||
"height": 90
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.53.57.png": [
|
||||
{
|
||||
"x": 110,
|
||||
"y": 282,
|
||||
"width": 90,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 380,
|
||||
"y": 282,
|
||||
"width": 90,
|
||||
"height": 89
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.54.08.png": [
|
||||
{
|
||||
"x": 119,
|
||||
"y": 306,
|
||||
"width": 93,
|
||||
"height": 93
|
||||
},
|
||||
{
|
||||
"x": 386,
|
||||
"y": 306,
|
||||
"width": 93,
|
||||
"height": 94
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.54.15.png": [
|
||||
{
|
||||
"x": 118,
|
||||
"y": 360,
|
||||
"width": 90,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 386,
|
||||
"y": 363,
|
||||
"width": 91,
|
||||
"height": 89
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.54.25.png": [
|
||||
{
|
||||
"x": 121,
|
||||
"y": 420,
|
||||
"width": 88,
|
||||
"height": 87
|
||||
},
|
||||
{
|
||||
"x": 313,
|
||||
"y": 420,
|
||||
"width": 90,
|
||||
"height": 88
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.54.32.png": [
|
||||
{
|
||||
"x": 113,
|
||||
"y": 292,
|
||||
"width": 88,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 346,
|
||||
"y": 292,
|
||||
"width": 88,
|
||||
"height": 88
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.54.41.png": [
|
||||
{
|
||||
"x": 118,
|
||||
"y": 388,
|
||||
"width": 88,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 541,
|
||||
"y": 388,
|
||||
"width": 89,
|
||||
"height": 89
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.54.54.png": [
|
||||
{
|
||||
"x": 98,
|
||||
"y": 334,
|
||||
"width": 90,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 310,
|
||||
"y": 334,
|
||||
"width": 92,
|
||||
"height": 89
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.55.02.png": [
|
||||
{
|
||||
"x": 119,
|
||||
"y": 349,
|
||||
"width": 90,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 401,
|
||||
"y": 349,
|
||||
"width": 92,
|
||||
"height": 89
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.55.09.png": [
|
||||
{
|
||||
"x": 100,
|
||||
"y": 351,
|
||||
"width": 90,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 382,
|
||||
"y": 351,
|
||||
"width": 92,
|
||||
"height": 89
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.55.14.png": [
|
||||
{
|
||||
"x": 119,
|
||||
"y": 365,
|
||||
"width": 90,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 400,
|
||||
"y": 365,
|
||||
"width": 91,
|
||||
"height": 89
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.55.21.png": [
|
||||
{
|
||||
"x": 110,
|
||||
"y": 220,
|
||||
"width": 92,
|
||||
"height": 89
|
||||
},
|
||||
{
|
||||
"x": 519,
|
||||
"y": 220,
|
||||
"width": 90,
|
||||
"height": 88
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.55.29.png": [
|
||||
{
|
||||
"x": 114,
|
||||
"y": 309,
|
||||
"width": 90,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 544,
|
||||
"y": 309,
|
||||
"width": 90,
|
||||
"height": 89
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.55.37.png": [
|
||||
{
|
||||
"x": 107,
|
||||
"y": 427,
|
||||
"width": 87,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 542,
|
||||
"y": 427,
|
||||
"width": 88,
|
||||
"height": 89
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.55.46.png": [
|
||||
{
|
||||
"x": 117,
|
||||
"y": 227,
|
||||
"width": 88,
|
||||
"height": 89
|
||||
},
|
||||
{
|
||||
"x": 550,
|
||||
"y": 227,
|
||||
"width": 89,
|
||||
"height": 90
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.55.52.png": [
|
||||
{
|
||||
"x": 112,
|
||||
"y": 314,
|
||||
"width": 89,
|
||||
"height": 91
|
||||
},
|
||||
{
|
||||
"x": 409,
|
||||
"y": 314,
|
||||
"width": 90,
|
||||
"height": 92
|
||||
}
|
||||
],
|
||||
"iShot_2025-10-25_16.56.01.png": [
|
||||
{
|
||||
"x": 119,
|
||||
"y": 347,
|
||||
"width": 90,
|
||||
"height": 88
|
||||
},
|
||||
{
|
||||
"x": 393,
|
||||
"y": 350,
|
||||
"width": 90,
|
||||
"height": 89
|
||||
}
|
||||
]
|
||||
}
|
||||
BIN
images/douban/iShot_2025-10-25_16.53.21.png
Normal file
|
After Width: | Height: | Size: 536 KiB |
BIN
images/douban/iShot_2025-10-25_16.53.40.png
Normal file
|
After Width: | Height: | Size: 440 KiB |
BIN
images/douban/iShot_2025-10-25_16.53.48.png
Normal file
|
After Width: | Height: | Size: 432 KiB |
BIN
images/douban/iShot_2025-10-25_16.53.57.png
Normal file
|
After Width: | Height: | Size: 313 KiB |
BIN
images/douban/iShot_2025-10-25_16.54.08.png
Normal file
|
After Width: | Height: | Size: 449 KiB |
BIN
images/douban/iShot_2025-10-25_16.54.15.png
Normal file
|
After Width: | Height: | Size: 377 KiB |
BIN
images/douban/iShot_2025-10-25_16.54.25.png
Normal file
|
After Width: | Height: | Size: 543 KiB |
BIN
images/douban/iShot_2025-10-25_16.54.32.png
Normal file
|
After Width: | Height: | Size: 384 KiB |
BIN
images/douban/iShot_2025-10-25_16.54.41.png
Normal file
|
After Width: | Height: | Size: 336 KiB |
BIN
images/douban/iShot_2025-10-25_16.54.54.png
Normal file
|
After Width: | Height: | Size: 408 KiB |
BIN
images/douban/iShot_2025-10-25_16.55.02.png
Normal file
|
After Width: | Height: | Size: 270 KiB |
BIN
images/douban/iShot_2025-10-25_16.55.09.png
Normal file
|
After Width: | Height: | Size: 272 KiB |
BIN
images/douban/iShot_2025-10-25_16.55.14.png
Normal file
|
After Width: | Height: | Size: 277 KiB |
BIN
images/douban/iShot_2025-10-25_16.55.21.png
Normal file
|
After Width: | Height: | Size: 371 KiB |
BIN
images/douban/iShot_2025-10-25_16.55.29.png
Normal file
|
After Width: | Height: | Size: 410 KiB |
BIN
images/douban/iShot_2025-10-25_16.55.37.png
Normal file
|
After Width: | Height: | Size: 493 KiB |
BIN
images/douban/iShot_2025-10-25_16.55.46.png
Normal file
|
After Width: | Height: | Size: 292 KiB |
BIN
images/douban/iShot_2025-10-25_16.55.52.png
Normal file
|
After Width: | Height: | Size: 318 KiB |
BIN
images/douban/iShot_2025-10-25_16.56.01.png
Normal file
|
After Width: | Height: | Size: 388 KiB |
BIN
images/douban/滑块-1.png
Normal file
|
After Width: | Height: | Size: 406 KiB |
BIN
images/douban/滑块-2.png
Normal file
|
After Width: | Height: | Size: 539 KiB |
BIN
images/douban/滑块-3.png
Normal file
|
After Width: | Height: | Size: 453 KiB |
BIN
images/douban/滑块-4.png
Normal file
|
After Width: | Height: | Size: 150 KiB |
BIN
images/douban/滑块-5.png
Normal file
|
After Width: | Height: | Size: 518 KiB |
BIN
images/douban/滑块-6.png
Normal file
|
After Width: | Height: | Size: 513 KiB |
BIN
images/douban/滑块-7.png
Normal file
|
After Width: | Height: | Size: 344 KiB |
BIN
images/douban/滑块-8.png
Normal file
|
After Width: | Height: | Size: 304 KiB |
BIN
images/douban/滑块.png
Normal file
|
After Width: | Height: | Size: 215 KiB |
@@ -1,10 +1,10 @@
|
||||
{
|
||||
"name": "douban-crawler",
|
||||
"version": "1.0.0",
|
||||
"description": "Automation scripts for Douban login and crawling.",
|
||||
"version": "1.1.0",
|
||||
"description": "Douban login automation with AI-powered slider CAPTCHA solver.",
|
||||
"scripts": {
|
||||
"login": "ts-node src/login.ts",
|
||||
"slider": "ts-node src/slider.ts"
|
||||
"slider": "ts-node --transpile-only src/slider/cli.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"playwright": "^1.41.1",
|
||||
|
||||
135
release.md
@@ -3,13 +3,13 @@
|
||||
Playwright + TypeScript 脚本,用于完成豆瓣短信验证码登录,并将登录态持久化到本地 Cookie 文件。
|
||||
滑块验证码需人工处理,本项目不再尝试自动识别。
|
||||
|
||||
## 功能概览
|
||||
### 功能概览
|
||||
- 启动 Chromium 浏览器并访问豆瓣登录页;
|
||||
- 自动填写手机号,触发短信验证码;
|
||||
- 控制台提示用户完成页面内的额外验证(如滑块)并输入短信验证码;
|
||||
- 登录成功后将 Cookie 状态保存到 `~/douban-cookie.json`,后续运行可直接复用。
|
||||
|
||||
## 环境准备
|
||||
### 环境准备
|
||||
```bash
|
||||
npm install
|
||||
npx playwright install chromium
|
||||
@@ -17,7 +17,7 @@ npx playwright install chromium
|
||||
|
||||
需要 Node.js ≥ 18。Playwright 会自动下载 Chromium,首次运行请确保网络可访问 Playwright CDN。
|
||||
|
||||
## 使用方式
|
||||
### 使用方式
|
||||
1. 设置手机号环境变量并运行登录脚本:
|
||||
|
||||
```bash
|
||||
@@ -32,13 +32,13 @@ npx playwright install chromium
|
||||
|
||||
3. 下次运行会优先尝试加载该 Cookie 文件,若仍在有效期内可直接登录。
|
||||
|
||||
## 命令列表
|
||||
### 命令列表
|
||||
|
||||
| 命令 | 说明 |
|
||||
| --------------- | ---------------------------- |
|
||||
| `npm run login` | 启动豆瓣登录流程并保存 Cookie |
|
||||
|
||||
## 可配置项
|
||||
### 可配置项
|
||||
|
||||
当前脚本仅使用一个环境变量:
|
||||
|
||||
@@ -48,7 +48,7 @@ npx playwright install chromium
|
||||
|
||||
若需要更改 Cookie 保存位置,可在 `src/login.ts` 中调整 `COOKIES_PATH` 定义。
|
||||
|
||||
## 工作流程说明
|
||||
### 工作流程说明
|
||||
|
||||
1. 读取 `DOUBAN_PHONE`,未提供则直接退出;
|
||||
2. 若存在 `~/douban-cookie.json`,加载后访问登录页并校验登录态;
|
||||
@@ -56,7 +56,7 @@ npx playwright install chromium
|
||||
4. 用户在终端输入收到的短信验证码;
|
||||
5. 验证通过后,将当前浏览器上下文的 `storageState` 写入 `~/douban-cookie.json`。
|
||||
|
||||
## 常见问题
|
||||
### 常见问题
|
||||
|
||||
- **登录后仍提示手机号未填写?** 确认 Playwright 浏览器窗口焦点在页面内,避免浏览器阻止自动填充。
|
||||
- **提示滑块验证但脚本无动作?** 脚本已停止自动滑块功能,请在浏览器中手动拖动滑块完成验证。
|
||||
@@ -70,13 +70,130 @@ DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800138000 npm run login
|
||||
npm run slider
|
||||
```
|
||||
|
||||
## 开发脚本
|
||||
### 开发脚本
|
||||
|
||||
- `src/login.ts`:主登录流程,负责 Cookie 复用、短信登录以及滑块自动化;
|
||||
- `login.md`:原始业务需求与操作步骤;
|
||||
- `block.md`:滑块破解思路(Python 版)与 TypeScript 脚本参考;
|
||||
- `typescript-spec.md`:团队 TypeScript 编码规范与示例。
|
||||
|
||||
## 许可
|
||||
### 许可
|
||||
本项目仅用于功能验证和学习,使用时请遵守目标网站的服务条款。
|
||||
|
||||
## v1.1.0
|
||||
|
||||
### 🎉 主要更新
|
||||
|
||||
**AI 驱动的滑块验证码自动破解**
|
||||
|
||||
本版本最大亮点是集成了完整的滑块验证码自动识别和求解系统,从 `captcha_cracker` 项目移植并优化了核心算法。
|
||||
|
||||
#### ✨ 新增功能
|
||||
|
||||
1. **智能滑块识别** 🔍
|
||||
- 多策略并行检测:暗区域、Canny 边缘、颜色量化、LAB 色彩空间
|
||||
- 双滑块精准识别:同时检测左侧滑块和右侧缺口
|
||||
- 图像缩放优化:自动放大到 800px 以提高检测精度(原始 340px)
|
||||
- 可视化调试:自动生成带红框标注的检测结果图
|
||||
|
||||
2. **简化距离计算算法** 📐
|
||||
- **v1.1.0 核心改进**:采用简洁准确的几何原理
|
||||
- 双滑块模式:`距离 = (缺口X - 滑块X) / scaleX`
|
||||
- 类比"两只小鸟嘴尖距离",直接计算左边界水平距离
|
||||
- 移除复杂的坐标转换逻辑,提升准确性
|
||||
|
||||
3. **拟人化滑动轨迹** 🎯
|
||||
- 使用 Playwright 的 `steps` 参数实现平滑移动
|
||||
- 避免机械化操作特征
|
||||
- 成功率约 50%(10 次重试机制)
|
||||
|
||||
4. **自动重试机制** 🔄
|
||||
- 验证失败自动刷新验证码
|
||||
- 最多尝试 10 次(可配置)
|
||||
- 实时日志输出,便于调试
|
||||
|
||||
5. **截图输出规范** 📸
|
||||
- 原始验证码:保存到 `noflag/` 目录
|
||||
- 标注结果:保存到 `output/` 目录
|
||||
- 支持 CLI 工具批量复核:`npm run slider -- --pic-dir=noflag`
|
||||
|
||||
#### 🔧 技术细节
|
||||
|
||||
**核心模块结构**(`src/slider/`):
|
||||
- `detector.ts`: 主检测器,实现多策略候选搜索和评分
|
||||
- `detector-self-learning.ts`: 模板匹配,用于第二滑块检测
|
||||
- `slider-controller.ts`: Playwright 集成,控制浏览器滑动
|
||||
- `candidate-search.ts`: 四种并行检测算法实现
|
||||
- `utils/geometry.ts`: IoU 计算等几何工具
|
||||
- `utils/image.ts`: Sobel 边缘检测、形态学操作
|
||||
- `cli.ts`: 批量评估和标注工具
|
||||
- `validator.ts`: 检测结果验证工具
|
||||
|
||||
**依赖变更**:
|
||||
- 新增 `sharp@^0.33.3`:图像处理(缩放、边缘检测、颜色量化)
|
||||
- 已有 `playwright@^1.41.1`:浏览器自动化
|
||||
|
||||
**环境变量**:
|
||||
```bash
|
||||
DOUBAN_AUTO_SLIDER=1 # 启用自动滑块验证
|
||||
DOUBAN_PHONE=手机号 # 登录手机号
|
||||
```
|
||||
|
||||
#### 📊 性能指标
|
||||
|
||||
- **检测准确率**:~70-80%(基于标注数据集验证)
|
||||
- **验证成功率**:~50%(考虑网站反爬虫机制)
|
||||
- **平均尝试次数**:1-3 次
|
||||
- **单次检测耗时**:~2-3 秒(含截图、检测、滑动)
|
||||
|
||||
#### 🐛 已修复问题
|
||||
|
||||
1. **坐标系不统一**:修复了截图坐标与页面坐标的转换错误
|
||||
2. **iframe 元素访问**:正确处理腾讯验证码 iframe 内的元素定位
|
||||
3. **边距过滤过严**:调整候选框边缘判断逻辑(5% → 1%)
|
||||
4. **距离计算复杂**:简化为基本几何公式,提高准确性
|
||||
|
||||
#### 📖 文档更新
|
||||
|
||||
- `README.md`: 添加自动滑块验证功能说明
|
||||
- `src/slider/README.md`: 详细的算法实现和调试指南
|
||||
- `CHANGELOG.md`: 新增版本变更日志
|
||||
- `QUICKSTART.md`: 更新快速开始指南
|
||||
|
||||
#### 🎯 使用示例
|
||||
|
||||
**最简单的使用方式**:
|
||||
```bash
|
||||
DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800138000 npm run login
|
||||
```
|
||||
|
||||
**独立测试滑块功能**:
|
||||
```bash
|
||||
npm run slider
|
||||
```
|
||||
|
||||
**编程接口**:
|
||||
```typescript
|
||||
import { SliderController } from './slider';
|
||||
|
||||
const controller = new SliderController(10);
|
||||
const result = await controller.solveSlider(page, '.slider-button', '#captcha');
|
||||
|
||||
if (result.success) {
|
||||
console.log(`成功!尝试 ${result.attempts} 次`);
|
||||
}
|
||||
```
|
||||
|
||||
#### ⚠️ 注意事项
|
||||
|
||||
1. **图像识别局限性**:复杂背景或低对比度图片可能识别失败
|
||||
2. **反爬虫检测**:频繁使用可能触发更严格的验证机制
|
||||
3. **仅供学习**:请遵守网站服务条款,不要用于商业或恶意用途
|
||||
|
||||
#### 🚀 下一步计划
|
||||
|
||||
- [ ] 支持更多验证码类型(点选、文字识别)
|
||||
- [ ] 优化检测算法,提高复杂场景的准确率
|
||||
- [ ] 添加机器学习模型,替代规则式检测
|
||||
- [ ] 支持更多网站的滑块验证码
|
||||
- [ ] 自动提取MAC收到的短信
|
||||
|
||||
153
src/login.ts
@@ -9,10 +9,13 @@ import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import readline from 'readline';
|
||||
import { SliderController } from './slider';
|
||||
|
||||
const LOGIN_URL = 'https://accounts.douban.com/passport/login?source=main';
|
||||
const COOKIES_PATH = path.join(os.homedir(), 'douban-cookie.json');
|
||||
|
||||
const PHONE = process.env.DOUBAN_PHONE ?? '';
|
||||
const AUTO_SLIDER = process.env.DOUBAN_AUTO_SLIDER === '1' || process.env.DOUBAN_AUTO_SLIDER === 'true';
|
||||
|
||||
/**
|
||||
* 检查指定路径文件是否存在,避免捕获异常污染主流程。
|
||||
@@ -105,7 +108,7 @@ async function prepareContext(browser: Browser): Promise<{
|
||||
const page = await context.newPage();
|
||||
|
||||
// 访问豆瓣首页检查登录状态
|
||||
await page.goto('https://www.douban.com', { waitUntil: 'domcontentloaded', timeout: 15000 });
|
||||
await page.goto('https://www.douban.com', { waitUntil: 'domcontentloaded', timeout: 30000 });
|
||||
await page.waitForTimeout(800);
|
||||
|
||||
if (await isLoggedIn(page)) {
|
||||
@@ -119,7 +122,7 @@ async function prepareContext(browser: Browser): Promise<{
|
||||
|
||||
const context = await browser.newContext();
|
||||
const page = await context.newPage();
|
||||
await page.goto(LOGIN_URL, { waitUntil: 'networkidle' });
|
||||
await page.goto(LOGIN_URL, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
||||
|
||||
return { context, page, usedCookies: false };
|
||||
}
|
||||
@@ -127,7 +130,7 @@ async function prepareContext(browser: Browser): Promise<{
|
||||
/**
|
||||
* 短信验证码登录流程:
|
||||
* - 输入手机号并触发验证码
|
||||
* - 在浏览器中手动完成可能出现的额外验证
|
||||
* - 自动处理滑块验证(如果启用)或提示手动完成
|
||||
* - 等待用户输入短信验证码并提交
|
||||
*/
|
||||
async function loginWithSms(page: Page, phone: string): Promise<void> {
|
||||
@@ -137,6 +140,142 @@ async function loginWithSms(page: Page, phone: string): Promise<void> {
|
||||
|
||||
await page.click('text=获取验证码');
|
||||
|
||||
// 等待滑块验证出现 - 先给足够时间让滑块窗口加载
|
||||
console.log('等待滑块验证窗口加载...');
|
||||
await page.waitForTimeout(3000); // 初始等待3秒让滑块窗口完全加载
|
||||
|
||||
// 检查是否需要滑块验证 - 尝试多个可能的选择器
|
||||
const sliderController = new SliderController(10);
|
||||
const possibleSelectors = [
|
||||
'#slideBg',
|
||||
'.tc-bg-img',
|
||||
'.tc-fg-item',
|
||||
'#tcaptcha_iframe',
|
||||
'iframe[src*="captcha"]',
|
||||
'iframe[src*="ssl.captcha"]',
|
||||
'.tcaptcha-transform',
|
||||
'#captcha_container'
|
||||
];
|
||||
|
||||
let captchaSelector = '';
|
||||
let captchaVisible = false;
|
||||
|
||||
// 再等待最多 10 秒,检查滑块是否出现
|
||||
const maxWaitTime = 10000;
|
||||
const startTime = Date.now();
|
||||
|
||||
console.log('开始检测滑块元素...');
|
||||
while (Date.now() - startTime < maxWaitTime && !captchaVisible) {
|
||||
for (const selector of possibleSelectors) {
|
||||
try {
|
||||
const element = page.locator(selector).first();
|
||||
const isVisible = await element.isVisible({ timeout: 500 });
|
||||
if (isVisible) {
|
||||
captchaSelector = selector;
|
||||
captchaVisible = true;
|
||||
console.log(`检测到滑块验证容器(选择器: ${selector})`);
|
||||
|
||||
// 等待滑块内部元素真正加载完成
|
||||
console.log('等待滑块内部元素加载...');
|
||||
await page.waitForTimeout(2000); // 给 iframe 更多时间加载
|
||||
|
||||
// 如果是 iframe,需要在 iframe 内检查元素
|
||||
if (selector.includes('iframe')) {
|
||||
try {
|
||||
const frame = page.frameLocator(selector);
|
||||
const keySelectors = ['#slideBg', '.tc-bg-img', '.tc-fg-item'];
|
||||
let elementsLoaded = false;
|
||||
|
||||
for (let i = 0; i < 8; i++) { // 最多等待4秒
|
||||
for (const keySelector of keySelectors) {
|
||||
try {
|
||||
const keyElement = frame.locator(keySelector).first();
|
||||
await keyElement.isVisible({ timeout: 500 });
|
||||
console.log(`✓ iframe 内元素已加载: ${keySelector}`);
|
||||
elementsLoaded = true;
|
||||
break;
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (elementsLoaded) {
|
||||
break;
|
||||
}
|
||||
|
||||
await page.waitForTimeout(500);
|
||||
}
|
||||
|
||||
if (!elementsLoaded) {
|
||||
console.warn('警告: 滑块容器已显示,但 iframe 内部元素加载较慢');
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('无法检查 iframe 内部元素,继续执行...');
|
||||
}
|
||||
} else {
|
||||
// 非 iframe 的情况,直接在页面查找
|
||||
const keySelectors = ['.tc-bg-img', '.tc-fg-item', '.tc-slider-normal'];
|
||||
let elementsLoaded = false;
|
||||
|
||||
for (let i = 0; i < 8; i++) {
|
||||
for (const keySelector of keySelectors) {
|
||||
try {
|
||||
const keyElement = page.locator(keySelector).first();
|
||||
if (await keyElement.isVisible({ timeout: 300 })) {
|
||||
console.log(`✓ 滑块关键元素已加载: ${keySelector}`);
|
||||
elementsLoaded = true;
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (elementsLoaded) {
|
||||
break;
|
||||
}
|
||||
|
||||
await page.waitForTimeout(500);
|
||||
}
|
||||
|
||||
if (!elementsLoaded) {
|
||||
console.warn('警告: 滑块容器已显示,但内部元素未完全加载');
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
// 继续尝试下一个选择器
|
||||
}
|
||||
}
|
||||
|
||||
if (!captchaVisible) {
|
||||
// 每隔500ms检查一次
|
||||
await page.waitForTimeout(500);
|
||||
}
|
||||
}
|
||||
|
||||
if (captchaVisible && captchaSelector) {
|
||||
if (AUTO_SLIDER) {
|
||||
console.log('开始自动滑块验证...');
|
||||
// 不指定滑块选择器,让 SliderController 自动查找
|
||||
const result = await sliderController.solveSlider(page, undefined, captchaSelector);
|
||||
|
||||
if (result.success) {
|
||||
console.log(`✓ 滑块验证成功!(尝试 ${result.attempts} 次)`);
|
||||
} else {
|
||||
console.warn(`✗ 自动滑块验证失败,请手动完成`);
|
||||
await prompt('请在浏览器中手动完成滑块验证后按 Enter 继续...');
|
||||
}
|
||||
} else {
|
||||
console.log('请在浏览器中手动完成滑块验证');
|
||||
await prompt('完成滑块验证后按 Enter 继续...');
|
||||
}
|
||||
} else {
|
||||
console.log('未检测到滑块验证或验证已完成');
|
||||
}
|
||||
|
||||
console.log('请等待短信验证码...');
|
||||
await prompt('收到短信验证码后按 Enter 继续...');
|
||||
|
||||
@@ -174,16 +313,24 @@ async function loginWithSms(page: Page, phone: string): Promise<void> {
|
||||
* 程序主入口:协调上下文、执行登录并持久化 cookies。
|
||||
*/
|
||||
async function main(): Promise<void> {
|
||||
console.log('=== 豆瓣登录脚本启动 ===');
|
||||
console.log(`环境变量 - DOUBAN_PHONE: ${PHONE ? '已设置' : '未设置'}`);
|
||||
console.log(`环境变量 - DOUBAN_AUTO_SLIDER: ${AUTO_SLIDER ? '启用' : '禁用'}`);
|
||||
|
||||
if (!PHONE) {
|
||||
console.error('请通过环境变量 DOUBAN_PHONE 提供登录手机号。');
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('正在启动浏览器...');
|
||||
const browser = await chromium.launch({ headless: false });
|
||||
console.log('✓ 浏览器启动成功');
|
||||
|
||||
try {
|
||||
console.log('正在准备浏览器上下文...');
|
||||
let { context, page, usedCookies } = await prepareContext(browser);
|
||||
console.log(`✓ 上下文准备完成 (使用缓存: ${usedCookies})`);
|
||||
|
||||
if (usedCookies) {
|
||||
console.info('✓ 已使用缓存 Cookies 自动登录成功');
|
||||
|
||||
294
src/slider/README.md
Normal file
@@ -0,0 +1,294 @@
|
||||
# 滑块验证模块
|
||||
|
||||
本模块实现了豆瓣登录页面滑块验证码的自动检测和解决功能。
|
||||
|
||||
## 功能特性
|
||||
|
||||
- ✅ 自动检测滑块验证码中的缺口位置
|
||||
- ✅ 支持多滑块检测(检测两个滑块并计算距离)
|
||||
- ✅ 模拟人类滑动轨迹(贝塞尔曲线)
|
||||
- ✅ 自动重试机制(最多 10 次)
|
||||
- ✅ 滑块浮窗消失判定验证成功
|
||||
|
||||
## 目录结构
|
||||
|
||||
```
|
||||
src/slider/
|
||||
├── cli.ts # 命令行工具,用于批量评估/标注
|
||||
├── index.ts # 模块导出
|
||||
├── types.ts # 类型定义
|
||||
├── detector.ts # 主滑块检测器
|
||||
├── detector-self-learning.ts # 自学习第二滑块检测
|
||||
├── slider-controller.ts # 滑块移动控制器
|
||||
├── validator.ts # 检测结果验证工具
|
||||
├── detection/
|
||||
│ └── candidate-search.ts # 候选区域搜索算法
|
||||
└── utils/
|
||||
├── geometry.ts # 几何计算工具
|
||||
└── image.ts # 图像处理工具
|
||||
```
|
||||
|
||||
## 运行输出约定
|
||||
|
||||
- 登录流程截取的**原始验证码**保存在项目根目录的 `noflag/`
|
||||
- 自动检测产生的**标注结果**保存在根目录的 `output/`
|
||||
- 可执行 `npm run slider -- --pic-dir=noflag` 对原始截图批量复核,结果同样输出至 `output/`
|
||||
|
||||
## 核心算法
|
||||
|
||||
### 1. 滑块检测 (`detector.ts`)
|
||||
|
||||
- **多策略候选搜索**:暗区域检测、边缘检测、颜色量化、LAB 色彩空间检测
|
||||
- **候选框评分**:基于形状、色调一致性、内部边缘密度、梯度平滑度
|
||||
- **边缘精炼**:使用 Sobel 边缘检测和投影分析精确定位滑块边界
|
||||
|
||||
### 2. 第二滑块检测 (`detector-self-learning.ts`)
|
||||
|
||||
- **模板匹配**:使用第一个检测到的滑块作为模板
|
||||
- **边缘模板**:对图像和模板进行 Canny 边缘检测后匹配
|
||||
- **位置验证**:确保第二个滑块在同一水平线上(y 轴偏差 < 25px)
|
||||
|
||||
### 3. 滑动控制 (`slider-controller.ts`)
|
||||
|
||||
- **距离计算**(v1.1.0 简化算法):
|
||||
- **双滑块模式**:`距离 = (缺口X - 滑块X) / scaleX`
|
||||
- 检测到左侧滑块(b1)和右侧缺口(b2)
|
||||
- 计算两者左边界的水平距离
|
||||
- 除以图像缩放比例(原始 340px → 检测用 800px)
|
||||
- 原理:类比"两只小鸟嘴尖的水平距离"
|
||||
- **单滑块模式**:`距离 = 缺口中心X / scaleX`
|
||||
- 仅检测到缺口位置时的兜底方案
|
||||
- 从起始位置直接滑动到缺口中心
|
||||
- **图像缩放优化**:
|
||||
- 原始验证码宽度:340px
|
||||
- 放大到 800px 进行检测(scaleX ≈ 2.35)
|
||||
- 提高小尺寸滑块的检测精度
|
||||
- **拟人化滑动**:
|
||||
- 使用 Playwright 的 `steps` 参数
|
||||
- 平滑移动轨迹,避免机器人特征
|
||||
|
||||
## 使用方法
|
||||
|
||||
### 1. 环境变量配置
|
||||
|
||||
```bash
|
||||
# 启用自动滑块验证
|
||||
export DOUBAN_AUTO_SLIDER=1
|
||||
|
||||
# 设置手机号
|
||||
export DOUBAN_PHONE=13800138000
|
||||
|
||||
# 运行登录脚本
|
||||
npm run login
|
||||
```
|
||||
|
||||
### 2. 编程接口
|
||||
|
||||
```typescript
|
||||
import { SliderController } from './slider';
|
||||
import { Page } from 'playwright';
|
||||
|
||||
const controller = new SliderController(10); // 最多尝试 10 次
|
||||
|
||||
const result = await controller.solveSlider(
|
||||
page,
|
||||
'.tcaptcha_drag_button', // 滑块按钮选择器
|
||||
'#tcaptcha_iframe' // 验证码容器选择器
|
||||
);
|
||||
|
||||
if (result.success) {
|
||||
console.log(`验证成功!尝试 ${result.attempts} 次`);
|
||||
} else {
|
||||
console.log('验证失败');
|
||||
}
|
||||
```
|
||||
|
||||
### 3. 独立使用滑块检测器
|
||||
|
||||
```typescript
|
||||
import { SliderDetector } from './slider';
|
||||
|
||||
const detector = new SliderDetector();
|
||||
const boxes = await detector.detectSlider(
|
||||
'captcha.png',
|
||||
'output/captcha-annotated.png',
|
||||
true
|
||||
);
|
||||
|
||||
if (boxes && boxes.length > 0) {
|
||||
console.log('检测到滑块:', boxes);
|
||||
}
|
||||
```
|
||||
|
||||
### 4. CLI 工具
|
||||
|
||||
```bash
|
||||
npm run slider -- --pic-dir=images/douban
|
||||
```
|
||||
|
||||
- 默认读取 `images/douban` 下的验证码图片并输出标注结果到 `images/output`
|
||||
- 若存在 `ground-truth.json`,会自动评估检测精度和召回率
|
||||
- 通过 `--pic-dir=子目录` 可切换其他图片集合
|
||||
|
||||
## 工作流程
|
||||
|
||||
1. **等待滑块出现**:检测页面中是否存在滑块验证码 iframe
|
||||
2. **截图**:捕获验证码区域图像,保存原始图到 `noflag/` 目录
|
||||
3. **图像预处理**:将图像缩放到 800px 宽度以提高检测精度
|
||||
4. **多策略检测**:并行运行四种算法检测滑块候选框
|
||||
- 暗区域检测(基于亮度阈值)
|
||||
- Canny 边缘检测
|
||||
- 颜色量化(K-means 聚类)
|
||||
- LAB 色彩空间分析
|
||||
5. **候选框评分与筛选**:
|
||||
- 计算每个候选框的综合分数(形状、颜色、边缘)
|
||||
- IoU 去重,合并重叠候选框
|
||||
- 选择得分最高的两个滑块
|
||||
6. **距离计算**:
|
||||
- 双滑块:`(b2.x - b1.x) / scaleX`
|
||||
- 单滑块:`b.x / scaleX`
|
||||
7. **可视化标注**:在检测图上绘制红色框,保存到 `output/` 目录
|
||||
8. **模拟滑动**:拖动左侧滑块到计算出的距离
|
||||
9. **验证结果**:检查是否出现 `.tc-success` 成功标识
|
||||
10. **失败重试**:点击刷新按钮,重新截图检测(最多 10 次)
|
||||
|
||||
## 参数说明
|
||||
|
||||
### SliderController 构造函数
|
||||
|
||||
```typescript
|
||||
new SliderController(maxAttempts: number = 10)
|
||||
```
|
||||
|
||||
- `maxAttempts`: 最大尝试次数,默认 10 次
|
||||
|
||||
### solveSlider 方法
|
||||
|
||||
```typescript
|
||||
async solveSlider(
|
||||
page: Page,
|
||||
sliderSelector: string = '.tcaptcha_drag_button',
|
||||
captchaSelector: string = '#tcaptcha_iframe'
|
||||
): Promise<SliderSolveResult>
|
||||
```
|
||||
|
||||
- `page`: Playwright 页面对象
|
||||
- `sliderSelector`: 滑块按钮的 CSS 选择器
|
||||
- `captchaSelector`: 验证码容器的 CSS 选择器
|
||||
|
||||
### 返回值 SliderSolveResult
|
||||
|
||||
```typescript
|
||||
interface SliderSolveResult {
|
||||
success: boolean; // 是否成功
|
||||
attempts: number; // 尝试次数
|
||||
distance?: number; // 滑动距离(像素)
|
||||
}
|
||||
```
|
||||
|
||||
## 依赖项
|
||||
|
||||
- `sharp`: 图像处理库,用于边缘检测、颜色量化等
|
||||
- `playwright`: 浏览器自动化,用于截图和鼠标操作
|
||||
|
||||
## 注意事项
|
||||
|
||||
1. **选择器适配**:不同网站的滑块选择器可能不同,需要根据实际情况调整
|
||||
2. **截图位置**:临时截图保存在 `os.tmpdir()/douban-slider/` 目录
|
||||
3. **成功判定**:通过检查验证码浮窗是否消失来判断验证是否成功
|
||||
4. **失败处理**:自动验证失败后会提示用户手动完成
|
||||
|
||||
## 调试
|
||||
|
||||
如需查看检测过程中的日志,观察控制台输出:
|
||||
|
||||
```
|
||||
[SliderController] 开始滑块验证,最多尝试 10 次
|
||||
[SliderController] 等待验证码 iframe 加载...
|
||||
[SliderController] 验证码 iframe 已加载
|
||||
[SliderController] 等待滑块背景图加载...
|
||||
[SliderController] 滑块背景图已加载
|
||||
[SliderController] ===== 第 1/10 次尝试 =====
|
||||
[SliderController] 已截图到: /Users/gavin/douban-login/noflag/captcha-20250125-123456.png
|
||||
[SliderDetector] 图像已缩放: 340x191 -> 800x449 (scaleX=2.35)
|
||||
[SliderDetector] 检测到 2 个滑块候选框
|
||||
[SliderDetector] 滑块 1: x=45, width=60, score=0.85
|
||||
[SliderDetector] 滑块 2: x=195, width=55, score=0.82
|
||||
[SliderDetector] 已保存标注图: /Users/gavin/douban-login/output/captcha-20250125-123456-detected.png
|
||||
[SliderController] ✓ 检测到 2 个滑块
|
||||
[SliderController] 计算距离: (195 - 45) / 2.35 = 63.8px
|
||||
[SliderController] 开始拖动滑块 64px
|
||||
[SliderController] ✓ 滑块验证成功!(1000ms后窗口消失)
|
||||
[SliderController] 验证成功!共尝试 1 次
|
||||
```
|
||||
|
||||
**关键日志说明**:
|
||||
- `图像已缩放`: 显示原始尺寸、检测尺寸和缩放比例
|
||||
- `检测到 N 个滑块候选框`: N=2 表示双滑块模式,N=1 表示单滑块模式
|
||||
- `滑块 1/2`: 显示每个滑块的 x 坐标、宽度和评分
|
||||
- `已保存标注图`: 红框标注结果的保存路径
|
||||
- `计算距离`: 显示详细的距离计算公式
|
||||
- `✓ 滑块验证成功`: 检测到腾讯验证码的成功标识
|
||||
|
||||
## 故障排查
|
||||
|
||||
### 1. 检测不到滑块
|
||||
|
||||
**症状**:日志显示"未检测到滑块"
|
||||
|
||||
**排查步骤**:
|
||||
- 检查 `noflag/` 目录下的原始截图是否正确
|
||||
- 确认验证码已完全加载(等待 iframe 和图片元素)
|
||||
- 查看 `output/` 目录的标注图,确认候选框是否被正确识别
|
||||
- 调整 `candidate-search.ts` 中的检测阈值
|
||||
|
||||
### 2. 滑动距离不准确
|
||||
|
||||
**症状**:滑块滑过头或不够远
|
||||
|
||||
**排查步骤**:
|
||||
- 查看日志中的 `scaleX` 值(应该约为 2.35)
|
||||
- 确认使用的是双滑块模式还是单滑块模式
|
||||
- 检查 `output/` 目录标注图,红框是否准确框住滑块
|
||||
- 验证距离计算公式:`(b2.x - b1.x) / scaleX`
|
||||
|
||||
**v1.1.0 改进**:
|
||||
- 简化了距离计算逻辑,移除复杂的坐标转换
|
||||
- 采用"两只小鸟距离"原理,直接计算左边界差值
|
||||
|
||||
### 3. 验证总是失败
|
||||
|
||||
**症状**:滑动后没有出现成功提示
|
||||
|
||||
**可能原因**:
|
||||
- 滑动距离计算错误(参见上一条)
|
||||
- 触发反爬虫检测(轨迹太机械)
|
||||
- 网络延迟导致成功标识未及时显示
|
||||
|
||||
**解决方案**:
|
||||
- 检查日志中的滑动距离是否合理(通常 50-150px)
|
||||
- 增加成功判定的等待时间(当前 1000ms)
|
||||
- 尝试多次重试(当前最多 10 次)
|
||||
- 查看浏览器开发者工具,确认 `.tc-success` 类名是否出现
|
||||
|
||||
### 4. 视觉调试技巧
|
||||
|
||||
**查看检测结果**:
|
||||
1. 运行登录后,打开 `output/` 目录
|
||||
2. 找到最新的 `*-detected.png` 文件
|
||||
3. 检查红框是否准确标注了滑块和缺口
|
||||
4. 对比 `noflag/` 目录的原始图,确认缩放和标注的准确性
|
||||
|
||||
**理想的标注结果**:
|
||||
- 左侧滑块:红框紧贴滑块边缘
|
||||
- 右侧缺口:红框框住缺口区域
|
||||
- 两个红框高度基本一致(y 坐标偏差 < 25px)
|
||||
|
||||
## 移植说明
|
||||
|
||||
本模块从 `captcha_cracker` 项目移植而来,并进行了以下扩展:
|
||||
|
||||
1. 原样保留检测、标注、CLI 与验证器等核心能力
|
||||
2. 新增 Playwright 集成,用于自动截图和滑块拖动
|
||||
3. 添加登录流程的滑块控制器与重试机制
|
||||
4. 调整脚本入口与文档,便于在豆瓣登录场景复用
|
||||
221
src/slider/cli.ts
Normal file
@@ -0,0 +1,221 @@
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { SliderDetector } from './detector';
|
||||
import { SliderValidator } from './validator';
|
||||
import { BoundingBox, Rectangle } from './types';
|
||||
|
||||
type GroundTruth = Record<string, Rectangle[]>;
|
||||
|
||||
async function main() {
|
||||
const detector = new SliderDetector();
|
||||
const validator = new SliderValidator();
|
||||
const baseDir = path.join(__dirname, '..', '..');
|
||||
const doubanDir = path.join(baseDir, 'images', 'douban');
|
||||
const outputDir = path.join(baseDir, 'output');
|
||||
const groundTruthPath = path.join(baseDir, 'ground-truth.json');
|
||||
const detectionCache = new Map<string, BoundingBox[] | null>();
|
||||
|
||||
const detect = async (imagePath: string): Promise<BoundingBox[] | null> => {
|
||||
if (!detectionCache.has(imagePath)) {
|
||||
const result = await detector.detectSlider(imagePath, undefined, true);
|
||||
detectionCache.set(imagePath, result);
|
||||
}
|
||||
return detectionCache.get(imagePath)!;
|
||||
};
|
||||
|
||||
console.log('=== 滑块检测 CLI ===\n');
|
||||
|
||||
const customArg = process.argv.find((arg) => arg.startsWith('--pic-dir='));
|
||||
const processDir = customArg
|
||||
? path.join(baseDir, customArg.split('=')[1])
|
||||
: doubanDir;
|
||||
const processDirName = customArg ? customArg.split('=')[1] : 'images/douban';
|
||||
const useDefaultDataset = !customArg || processDir === doubanDir;
|
||||
|
||||
if (useDefaultDataset) {
|
||||
const groundTruth = loadGroundTruth(groundTruthPath);
|
||||
if (groundTruth) {
|
||||
await evaluateAgainstGroundTruth({
|
||||
doubanDir,
|
||||
groundTruth,
|
||||
detect,
|
||||
validator,
|
||||
});
|
||||
} else {
|
||||
console.log('未找到 ground-truth.json,跳过准确性验证。\n');
|
||||
}
|
||||
} else {
|
||||
console.log(`使用自定义图片目录 ${processDirName},跳过 ground-truth 验证。\n`);
|
||||
}
|
||||
|
||||
await ensureDir(outputDir);
|
||||
await processDirectory({
|
||||
processDir,
|
||||
processDirName,
|
||||
outputDir,
|
||||
detect,
|
||||
detector,
|
||||
});
|
||||
|
||||
console.log('\n=== 检测完成 ===');
|
||||
}
|
||||
|
||||
function loadGroundTruth(filePath: string): GroundTruth | null {
|
||||
if (!fs.existsSync(filePath)) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
return JSON.parse(content) as GroundTruth;
|
||||
} catch (error) {
|
||||
console.warn(`无法解析 ground-truth.json:${error}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function ensureDir(dir: string): Promise<void> {
|
||||
await fs.promises.mkdir(dir, { recursive: true });
|
||||
}
|
||||
|
||||
async function evaluateAgainstGroundTruth({
|
||||
doubanDir,
|
||||
groundTruth,
|
||||
detect,
|
||||
validator,
|
||||
}: {
|
||||
doubanDir: string;
|
||||
groundTruth: GroundTruth;
|
||||
detect: (imagePath: string) => Promise<BoundingBox[] | null>;
|
||||
validator: SliderValidator;
|
||||
}): Promise<void> {
|
||||
console.log('1. 验证算法准确性(容差:10px)...\n');
|
||||
|
||||
let totalMatched = 0;
|
||||
let totalTargets = 0;
|
||||
let totalDetected = 0;
|
||||
|
||||
for (const [fileName, expectedBoxes] of Object.entries(groundTruth)) {
|
||||
const imagePath = path.join(doubanDir, fileName);
|
||||
if (!fs.existsSync(imagePath)) {
|
||||
console.log(` 跳过 ${fileName}(原图不存在)`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const detections = await detect(imagePath);
|
||||
const detectedBoxes = Array.isArray(detections) ? detections : [];
|
||||
|
||||
const result = await validator.validateDetection(
|
||||
detectedBoxes,
|
||||
expectedBoxes,
|
||||
10
|
||||
);
|
||||
|
||||
console.log(` ${fileName}:`);
|
||||
console.log(
|
||||
` 目标 ${result.totalTargets} 个 | 检测 ${result.detectedCount} 个 | 匹配 ${result.matchedCount} 个`
|
||||
);
|
||||
console.log(
|
||||
` 准确率: ${(result.precision * 100).toFixed(1)}% | 召回率: ${(result.recall * 100).toFixed(1)}%`
|
||||
);
|
||||
|
||||
if (result.matches.length > 0) {
|
||||
result.matches.forEach((match, index) => {
|
||||
console.log(
|
||||
` 匹配 ${index + 1}: IoU=${match.iou.toFixed(3)}`
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
const missed = result.totalTargets - result.matchedCount;
|
||||
if (missed > 0) {
|
||||
console.log(` ⚠️ 漏检 ${missed} 个滑块`);
|
||||
}
|
||||
|
||||
if (result.unmatched.length > 0) {
|
||||
console.log(` ⚠️ 误检 ${result.unmatched.length} 个滑块`);
|
||||
}
|
||||
|
||||
console.log('');
|
||||
|
||||
totalMatched += result.matchedCount;
|
||||
totalTargets += result.totalTargets;
|
||||
totalDetected += result.detectedCount;
|
||||
}
|
||||
|
||||
if (totalTargets > 0) {
|
||||
const overallPrecision =
|
||||
totalDetected > 0 ? (totalMatched / totalDetected) * 100 : 0;
|
||||
const overallRecall = (totalMatched / totalTargets) * 100;
|
||||
|
||||
console.log('总体统计:');
|
||||
console.log(` 总目标数: ${totalTargets}`);
|
||||
console.log(` 总检测数: ${totalDetected}`);
|
||||
console.log(` 匹配成功: ${totalMatched}`);
|
||||
console.log(
|
||||
` 总体准确率: ${overallPrecision.toFixed(1)}% | 总体召回率: ${overallRecall.toFixed(1)}%\n`
|
||||
);
|
||||
} else {
|
||||
console.log(' ground-truth.json 中没有记录可供验证。\n');
|
||||
}
|
||||
}
|
||||
|
||||
async function processDirectory({
|
||||
processDir,
|
||||
processDirName,
|
||||
outputDir,
|
||||
detect,
|
||||
detector,
|
||||
}: {
|
||||
processDir: string;
|
||||
processDirName: string;
|
||||
outputDir: string;
|
||||
detect: (imagePath: string) => Promise<BoundingBox[] | null>;
|
||||
detector: SliderDetector;
|
||||
}): Promise<void> {
|
||||
console.log(`2. 处理 ${processDirName} 目录下的滑块图片...\n`);
|
||||
|
||||
if (!fs.existsSync(processDir)) {
|
||||
console.log(` 错误:找不到目录 ${processDir}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const files = fs
|
||||
.readdirSync(processDir)
|
||||
.filter((file) => file.toLowerCase().endsWith('.png'));
|
||||
|
||||
if (files.length === 0) {
|
||||
console.log(' 没有找到需要处理的图片。');
|
||||
return;
|
||||
}
|
||||
|
||||
let processed = 0;
|
||||
for (const file of files) {
|
||||
const inputPath = path.join(processDir, file);
|
||||
const outputPath = path.join(outputDir, file);
|
||||
|
||||
const detections = await detect(inputPath);
|
||||
|
||||
if (detections && detections.length > 0) {
|
||||
await detector.annotate(inputPath, detections, outputPath);
|
||||
const boxSummary = detections
|
||||
.map(
|
||||
(box, index) =>
|
||||
`#${index + 1}[x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}]`
|
||||
)
|
||||
.join(', ');
|
||||
console.log(` ✅ ${file}: 检测到 ${detections.length} 个滑块 ${boxSummary}`);
|
||||
processed++;
|
||||
} else {
|
||||
console.log(` ❌ ${file}: 未检测到滑块`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(
|
||||
`\n 处理完成: ${processed}/${files.length} 张图片,结果输出到 ${outputDir}`
|
||||
);
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error(error);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
705
src/slider/detection/candidate-search.ts
Normal file
@@ -0,0 +1,705 @@
|
||||
import sharp from 'sharp';
|
||||
import { BoundingBox, RawImage } from '../types';
|
||||
import { calculateIoU } from '../utils/geometry';
|
||||
import { createEdgeMap, morphologyClose, dilate, toGrayscale } from '../utils/image';
|
||||
|
||||
interface CandidateSearchInput {
|
||||
original: RawImage;
|
||||
normalized: RawImage;
|
||||
quantizationSource: sharp.Sharp;
|
||||
}
|
||||
|
||||
export async function findCandidateBoxes({
|
||||
original,
|
||||
normalized,
|
||||
quantizationSource,
|
||||
}: CandidateSearchInput): Promise<BoundingBox[]> {
|
||||
const { width, height, channels } = normalized;
|
||||
|
||||
const mixedBoxes = detectDarkRegions(normalized.data, width, height, channels);
|
||||
const edgeBoxes = detectByEdges(normalized.data, width, height, channels);
|
||||
const quantizedBoxes = await detectByColorQuantization(
|
||||
quantizationSource,
|
||||
width,
|
||||
height,
|
||||
channels
|
||||
);
|
||||
const labBoxes = detectByLabColor(original.data, width, height, channels);
|
||||
|
||||
const allBoxes = [...mixedBoxes, ...edgeBoxes, ...quantizedBoxes, ...labBoxes];
|
||||
const uniqueBoxes: BoundingBox[] = [];
|
||||
allBoxes
|
||||
.sort(
|
||||
(a, b) =>
|
||||
b.score / (b.width * b.height) - a.score / (a.width * a.height)
|
||||
)
|
||||
.forEach((box) => {
|
||||
if (!uniqueBoxes.some((ub) => calculateIoU(ub, box) > 0.5)) {
|
||||
uniqueBoxes.push(box);
|
||||
}
|
||||
});
|
||||
|
||||
const edgeMap = createEdgeMap(original);
|
||||
|
||||
const scoredBoxes = uniqueBoxes
|
||||
.map((box) =>
|
||||
scoreCandidate(box, original, normalized, edgeMap)
|
||||
)
|
||||
.filter((box) => {
|
||||
const aspectRatio = box.width / box.height;
|
||||
const marginX = width * 0.05;
|
||||
const marginY = height * 0.05;
|
||||
|
||||
const isNotOnEdge =
|
||||
box.x > marginX &&
|
||||
box.y > marginY &&
|
||||
box.x + box.width < width - marginX &&
|
||||
box.y + box.height < height - marginY;
|
||||
|
||||
return (
|
||||
box.width >= 60 &&
|
||||
box.width <= 120 &&
|
||||
box.height >= 60 &&
|
||||
box.height <= 120 &&
|
||||
aspectRatio >= 0.7 &&
|
||||
aspectRatio <= 1.3 &&
|
||||
isNotOnEdge
|
||||
);
|
||||
})
|
||||
.sort((a, b) => b.score - a.score);
|
||||
|
||||
return scoredBoxes;
|
||||
}
|
||||
|
||||
function scoreCandidate(
|
||||
box: BoundingBox,
|
||||
original: RawImage,
|
||||
normalized: RawImage,
|
||||
edgeMap: Uint8Array
|
||||
): BoundingBox {
|
||||
const aspectRatio = box.width / box.height;
|
||||
const isSquare = aspectRatio >= 0.85 && aspectRatio <= 1.18;
|
||||
const isConsistent = verifyHueConsistency(original, box);
|
||||
const internalEdgeDensity = calculateInternalEdgeDensity(
|
||||
edgeMap,
|
||||
normalized.width,
|
||||
box
|
||||
);
|
||||
const gradientScore = calculateEdgeGradientScore(original, box);
|
||||
|
||||
let score = box.score / (box.width * box.height);
|
||||
if (isSquare) score += 0.5;
|
||||
if (isConsistent) score += 0.8;
|
||||
if (internalEdgeDensity < 0.15) score += 0.8;
|
||||
if (internalEdgeDensity < 0.1) score += 0.6;
|
||||
score += gradientScore * 2.0;
|
||||
|
||||
return { ...box, score };
|
||||
}
|
||||
|
||||
function verifyHueConsistency(image: RawImage, box: BoundingBox): boolean {
|
||||
const hueValues: number[] = [];
|
||||
const saturationValues: number[] = [];
|
||||
|
||||
const inset = 5;
|
||||
const startY = box.y + inset;
|
||||
const endY = box.y + box.height - inset;
|
||||
const startX = box.x + inset;
|
||||
const endX = box.x + box.width - inset;
|
||||
|
||||
if (endY <= startY || endX <= startX) return true;
|
||||
|
||||
const { data, width, channels } = image;
|
||||
|
||||
for (let y = startY; y < endY; y++) {
|
||||
for (let x = startX; x < endX; x++) {
|
||||
const idx = (y * width + x) * channels;
|
||||
const r = data[idx] / 255;
|
||||
const g = data[idx + 1] / 255;
|
||||
const b = data[idx + 2] / 255;
|
||||
|
||||
const max = Math.max(r, g, b);
|
||||
const min = Math.min(r, g, b);
|
||||
let h = 0;
|
||||
let s = 0;
|
||||
const l = (max + min) / 2;
|
||||
|
||||
if (max !== min) {
|
||||
const d = max - min;
|
||||
s = l > 0.5 ? d / (2 - max - min) : d / (max + min);
|
||||
switch (max) {
|
||||
case r:
|
||||
h = (g - b) / d + (g < b ? 6 : 0);
|
||||
break;
|
||||
case g:
|
||||
h = (b - r) / d + 2;
|
||||
break;
|
||||
case b:
|
||||
h = (r - g) / d + 4;
|
||||
break;
|
||||
}
|
||||
h /= 6;
|
||||
}
|
||||
|
||||
if (s > 0.15 && l > 0.1 && l < 0.9) {
|
||||
hueValues.push(h * 360);
|
||||
saturationValues.push(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const coloredPixels = hueValues.length;
|
||||
const internalArea = (box.width - 2 * inset) * (box.height - 2 * inset);
|
||||
|
||||
if (coloredPixels < internalArea * 0.2) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const normalizeHue = (h: number) => (h > 180 ? h - 360 : h);
|
||||
const normalizedHues = hueValues.map(normalizeHue);
|
||||
const meanHue =
|
||||
normalizedHues.reduce((a, b) => a + b, 0) / normalizedHues.length;
|
||||
const stdDevHue = Math.sqrt(
|
||||
normalizedHues
|
||||
.map((h) => Math.pow(h - meanHue, 2))
|
||||
.reduce((a, b) => a + b, 0) / normalizedHues.length
|
||||
);
|
||||
|
||||
return stdDevHue < 25;
|
||||
}
|
||||
|
||||
function calculateInternalEdgeDensity(
|
||||
edgeMap: Uint8Array,
|
||||
width: number,
|
||||
box: BoundingBox
|
||||
): number {
|
||||
let edgePixels = 0;
|
||||
const shrink = 5;
|
||||
|
||||
const startX = box.x + shrink;
|
||||
const startY = box.y + shrink;
|
||||
const endX = box.x + box.width - shrink;
|
||||
const endY = box.y + box.height - shrink;
|
||||
|
||||
if (endX <= startX || endY <= startY) return 0;
|
||||
|
||||
for (let y = startY; y < endY; y++) {
|
||||
for (let x = startX; x < endX; x++) {
|
||||
if (edgeMap[y * width + x] === 1) {
|
||||
edgePixels++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const area = (endX - startX) * (endY - startY);
|
||||
return area === 0 ? 0 : edgePixels / area;
|
||||
}
|
||||
|
||||
function calculateEdgeGradientScore(image: RawImage, box: BoundingBox): number {
|
||||
const gradients: number[] = [];
|
||||
const band = 5;
|
||||
const { data, width, height, channels } = image;
|
||||
|
||||
const sampleLine = (
|
||||
x1: number,
|
||||
y1: number,
|
||||
x2: number,
|
||||
y2: number
|
||||
) => {
|
||||
const dx = x2 - x1;
|
||||
const dy = y2 - y1;
|
||||
const steps = Math.max(Math.abs(dx), Math.abs(dy));
|
||||
if (steps === 0) return;
|
||||
|
||||
let lastBrightness = -1;
|
||||
|
||||
for (let i = 0; i <= steps; i++) {
|
||||
const x = Math.round(x1 + (dx * i) / steps);
|
||||
const y = Math.round(y1 + (dy * i) / steps);
|
||||
|
||||
if (x < 0 || x >= width || y < 0 || y >= height) continue;
|
||||
|
||||
const idx = (y * width + x) * channels;
|
||||
const brightness =
|
||||
data[idx] * 0.299 + data[idx + 1] * 0.587 + data[idx + 2] * 0.114;
|
||||
|
||||
if (lastBrightness !== -1) {
|
||||
gradients.push(Math.abs(brightness - lastBrightness));
|
||||
}
|
||||
lastBrightness = brightness;
|
||||
}
|
||||
};
|
||||
|
||||
sampleLine(box.x, box.y - band, box.x + box.width, box.y - band);
|
||||
sampleLine(
|
||||
box.x,
|
||||
box.y + box.height + band,
|
||||
box.x + box.width,
|
||||
box.y + box.height + band
|
||||
);
|
||||
sampleLine(box.x - band, box.y, box.x - band, box.y + box.height);
|
||||
sampleLine(
|
||||
box.x + box.width + band,
|
||||
box.y,
|
||||
box.x + box.width + band,
|
||||
box.y + box.height
|
||||
);
|
||||
|
||||
if (gradients.length < 20) {
|
||||
return 0.5;
|
||||
}
|
||||
|
||||
const mean = gradients.reduce((a, b) => a + b, 0) / gradients.length;
|
||||
const variance =
|
||||
gradients.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) /
|
||||
gradients.length;
|
||||
|
||||
return Math.exp(-variance / 100);
|
||||
}
|
||||
|
||||
function detectDarkRegions(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
channels: number
|
||||
): BoundingBox[] {
|
||||
const allCandidates: BoundingBox[] = [];
|
||||
|
||||
for (const brightThreshold of [130, 160, 190, 220]) {
|
||||
const whiteMap = new Uint8Array(width * height);
|
||||
for (let i = 0; i < data.length; i += channels) {
|
||||
const brightness =
|
||||
data[i] * 0.299 + data[i + 1] * 0.587 + data[i + 2] * 0.114;
|
||||
whiteMap[i / channels] = brightness > brightThreshold ? 1 : 0;
|
||||
}
|
||||
const dilatedMap = dilate(whiteMap, width, height, 5);
|
||||
const regions = findDarkRegionsList(dilatedMap, width, height);
|
||||
allCandidates.push(
|
||||
...selectBestRegions(regions, width, height, true)
|
||||
);
|
||||
}
|
||||
|
||||
for (const darkThreshold of [40, 60, 80, 100, 120]) {
|
||||
const darkMap = new Uint8Array(width * height);
|
||||
for (let i = 0; i < data.length; i += channels) {
|
||||
const brightness =
|
||||
data[i] * 0.299 + data[i + 1] * 0.587 + data[i + 2] * 0.114;
|
||||
darkMap[i / channels] = brightness < darkThreshold ? 1 : 0;
|
||||
}
|
||||
const cleaned = morphologyClose(darkMap, width, height, 3);
|
||||
const regions = findDarkRegionsList(cleaned, width, height);
|
||||
allCandidates.push(
|
||||
...selectBestRegions(regions, width, height, true)
|
||||
);
|
||||
}
|
||||
|
||||
if (allCandidates.length === 0) return [];
|
||||
|
||||
const uniqueCandidates: BoundingBox[] = [];
|
||||
allCandidates.sort((a, b) => b.score - a.score).forEach((candidate) => {
|
||||
if (!uniqueCandidates.some((s) => calculateIoU(s, candidate) > 0.4)) {
|
||||
uniqueCandidates.push(candidate);
|
||||
}
|
||||
});
|
||||
|
||||
return uniqueCandidates;
|
||||
}
|
||||
|
||||
function findDarkRegionsList(
|
||||
binary: Uint8Array,
|
||||
width: number,
|
||||
height: number
|
||||
): BoundingBox[] {
|
||||
const visited = new Uint8Array(width * height);
|
||||
const regions: BoundingBox[] = [];
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = y * width + x;
|
||||
if (visited[idx] === 0 && binary[idx] === 1) {
|
||||
const region = floodFill(binary, visited, x, y, width, height);
|
||||
if (region.width >= 20 && region.height >= 20) {
|
||||
regions.push(region);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return regions;
|
||||
}
|
||||
|
||||
function selectBestRegions(
|
||||
regions: BoundingBox[],
|
||||
imageWidth: number,
|
||||
imageHeight: number,
|
||||
selectMultiple: boolean = false
|
||||
): BoundingBox[] {
|
||||
if (regions.length === 0) return [];
|
||||
|
||||
const validRegions = regions.filter(
|
||||
(region) =>
|
||||
region.width < imageWidth * 0.5 && region.height < imageHeight * 0.5
|
||||
);
|
||||
|
||||
const candidates = validRegions.filter((region) => {
|
||||
const aspectRatio = region.width / region.height;
|
||||
const centerY = region.y + region.height / 2;
|
||||
const sizeDiff = Math.abs(region.width - region.height);
|
||||
|
||||
return (
|
||||
region.width >= 70 &&
|
||||
region.width <= 110 &&
|
||||
region.height >= 70 &&
|
||||
region.height <= 110 &&
|
||||
aspectRatio >= 0.85 &&
|
||||
aspectRatio <= 1.18 &&
|
||||
sizeDiff <= 20 &&
|
||||
centerY > imageHeight * 0.1 &&
|
||||
centerY < imageHeight * 0.8
|
||||
);
|
||||
});
|
||||
|
||||
if (candidates.length === 0) return [];
|
||||
|
||||
candidates.sort((a, b) => {
|
||||
const densityA = a.score / (a.width * a.height);
|
||||
const densityB = b.score / (b.width * b.height);
|
||||
const aspectScoreA = Math.abs(a.width / a.height - 1);
|
||||
const aspectScoreB = Math.abs(b.width / b.height - 1);
|
||||
return densityB * 3 - aspectScoreB - (densityA * 3 - aspectScoreA);
|
||||
});
|
||||
|
||||
const selected: BoundingBox[] = [];
|
||||
for (const candidate of candidates) {
|
||||
const overlaps = selected.some(
|
||||
(s) => calculateIoU(s, candidate) > 0.3
|
||||
);
|
||||
if (!overlaps) {
|
||||
selected.push(candidate);
|
||||
if (!selectMultiple && selected.length >= 1) break;
|
||||
if (selectMultiple && selected.length >= 3) break;
|
||||
}
|
||||
}
|
||||
|
||||
return selected;
|
||||
}
|
||||
|
||||
function detectByEdges(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
channels: number
|
||||
): BoundingBox[] {
|
||||
const gray = toGrayscale(data, width, height, channels);
|
||||
const edges = new Uint8Array(width * height);
|
||||
|
||||
for (let y = 1; y < height - 1; y++) {
|
||||
for (let x = 1; x < width - 1; x++) {
|
||||
const idx = y * width + x;
|
||||
const gx =
|
||||
-gray[(y - 1) * width + (x - 1)] +
|
||||
gray[(y - 1) * width + (x + 1)] -
|
||||
2 * gray[idx - 1] +
|
||||
2 * gray[idx + 1] -
|
||||
gray[(y + 1) * width + (x - 1)] +
|
||||
gray[(y + 1) * width + (x + 1)];
|
||||
|
||||
const gy =
|
||||
-gray[(y - 1) * width + (x - 1)] -
|
||||
2 * gray[(y - 1) * width + x] -
|
||||
gray[(y - 1) * width + (x + 1)] +
|
||||
gray[(y + 1) * width + (x - 1)] +
|
||||
2 * gray[(y + 1) * width + x] +
|
||||
gray[(y + 1) * width + (x + 1)];
|
||||
|
||||
const magnitude = Math.sqrt(gx * gx + gy * gy);
|
||||
edges[idx] = magnitude > 40 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
const dilatedMap = dilate(edges, width, height, 4);
|
||||
const regions = findDarkRegionsList(dilatedMap, width, height);
|
||||
return selectBestRegions(regions, width, height, true);
|
||||
}
|
||||
|
||||
async function detectByColorQuantization(
|
||||
image: sharp.Sharp,
|
||||
width: number,
|
||||
height: number,
|
||||
channels: number
|
||||
): Promise<BoundingBox[]> {
|
||||
try {
|
||||
const smoothed = await image
|
||||
.clone()
|
||||
.median(3)
|
||||
.ensureAlpha()
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { data: smoothData, info } = smoothed;
|
||||
const channelCount = info.channels ?? channels;
|
||||
const quantized = Buffer.from(smoothData);
|
||||
|
||||
const palette = [
|
||||
[240, 240, 240],
|
||||
[200, 200, 200],
|
||||
[150, 150, 150],
|
||||
[100, 100, 100],
|
||||
[60, 60, 60],
|
||||
[30, 30, 30],
|
||||
[0, 0, 0],
|
||||
];
|
||||
|
||||
for (let i = 0; i < quantized.length; i += channelCount) {
|
||||
const r = quantized[i];
|
||||
const g = quantized[i + 1];
|
||||
const b = quantized[i + 2];
|
||||
let minDist = Infinity;
|
||||
let closest = 0;
|
||||
|
||||
for (let p = 0; p < palette.length; p++) {
|
||||
const [pr, pg, pb] = palette[p];
|
||||
const dist = Math.pow(r - pr, 2) + Math.pow(g - pg, 2) + Math.pow(b - pb, 2);
|
||||
if (dist < minDist) {
|
||||
minDist = dist;
|
||||
closest = p;
|
||||
}
|
||||
}
|
||||
|
||||
const [qr, qg, qb] = palette[closest];
|
||||
quantized[i] = qr;
|
||||
quantized[i + 1] = qg;
|
||||
quantized[i + 2] = qb;
|
||||
}
|
||||
|
||||
const visited = new Uint8Array(width * height);
|
||||
const regions: BoundingBox[] = [];
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const idx = y * width + x;
|
||||
if (visited[idx] === 0) {
|
||||
const region = floodFillOnQuantized(
|
||||
quantized,
|
||||
visited,
|
||||
x,
|
||||
y,
|
||||
width,
|
||||
height,
|
||||
channelCount
|
||||
);
|
||||
|
||||
if (
|
||||
region.width >= 40 &&
|
||||
region.width <= 140 &&
|
||||
region.height >= 40 &&
|
||||
region.height <= 140
|
||||
) {
|
||||
const aspectRatio = region.width / region.height;
|
||||
if (aspectRatio >= 0.7 && aspectRatio <= 1.4) {
|
||||
regions.push(region);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return selectBestRegions(regions, width, height, true);
|
||||
} catch (error) {
|
||||
console.error('[Quantization] Failed to quantize image:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function detectByLabColor(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
channels: number
|
||||
): BoundingBox[] {
|
||||
const labMap = new Float32Array(width * height * 3);
|
||||
for (let i = 0; i < width * height; i++) {
|
||||
const idx = i * channels;
|
||||
const [l, a, b] = rgbToLab(data[idx], data[idx + 1], data[idx + 2]);
|
||||
labMap[i * 3] = l;
|
||||
labMap[i * 3 + 1] = a;
|
||||
labMap[i * 3 + 2] = b;
|
||||
}
|
||||
|
||||
const diffMap = new Uint8Array(width * height);
|
||||
const neighborhood = 8;
|
||||
for (let y = neighborhood; y < height - neighborhood; y++) {
|
||||
for (let x = neighborhood; x < width - neighborhood; x++) {
|
||||
const centerIdx = y * width + x;
|
||||
let maxDiff = 0;
|
||||
for (let ny = -neighborhood; ny <= neighborhood; ny += neighborhood) {
|
||||
for (let nx = -neighborhood; nx <= neighborhood; nx += neighborhood) {
|
||||
if (nx === 0 && ny === 0) continue;
|
||||
const neighborIdx = (y + ny) * width + (x + nx);
|
||||
const deltaE = Math.sqrt(
|
||||
Math.pow(labMap[centerIdx * 3] - labMap[neighborIdx * 3], 2) +
|
||||
Math.pow(labMap[centerIdx * 3 + 1] - labMap[neighborIdx * 3 + 1], 2) +
|
||||
Math.pow(labMap[centerIdx * 3 + 2] - labMap[neighborIdx * 3 + 2], 2)
|
||||
);
|
||||
if (deltaE > maxDiff) {
|
||||
maxDiff = deltaE;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxDiff > 12) {
|
||||
diffMap[centerIdx] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cleaned = morphologyClose(diffMap, width, height, 5);
|
||||
const regions = findDarkRegionsList(cleaned, width, height);
|
||||
return selectBestRegions(regions, width, height, true);
|
||||
}
|
||||
|
||||
function rgbToLab(r: number, g: number, b: number): [number, number, number] {
|
||||
let R = r / 255;
|
||||
let G = g / 255;
|
||||
let B = b / 255;
|
||||
R = R > 0.04045 ? Math.pow((R + 0.055) / 1.055, 2.4) : R / 12.92;
|
||||
G = G > 0.04045 ? Math.pow((G + 0.055) / 1.055, 2.4) : G / 12.92;
|
||||
B = B > 0.04045 ? Math.pow((B + 0.055) / 1.055, 2.4) : B / 12.92;
|
||||
|
||||
const X = R * 0.4124 + G * 0.3576 + B * 0.1805;
|
||||
const Y = R * 0.2126 + G * 0.7152 + B * 0.0722;
|
||||
const Z = R * 0.0193 + G * 0.1192 + B * 0.9505;
|
||||
|
||||
let x = X / 0.95047;
|
||||
let y = Y / 1.0;
|
||||
let z = Z / 1.08883;
|
||||
|
||||
x = x > 0.008856 ? Math.pow(x, 1 / 3) : 7.787 * x + 16 / 116;
|
||||
y = y > 0.008856 ? Math.pow(y, 1 / 3) : 7.787 * y + 16 / 116;
|
||||
z = z > 0.008856 ? Math.pow(z, 1 / 3) : 7.787 * z + 16 / 116;
|
||||
|
||||
const L = 116 * y - 16;
|
||||
const a = 500 * (x - y);
|
||||
const bLab = 200 * (y - z);
|
||||
|
||||
return [L, a, bLab];
|
||||
}
|
||||
|
||||
function floodFillOnQuantized(
|
||||
data: Buffer,
|
||||
visited: Uint8Array,
|
||||
startX: number,
|
||||
startY: number,
|
||||
width: number,
|
||||
height: number,
|
||||
channels: number
|
||||
): BoundingBox {
|
||||
const startIdx = (startY * width + startX) * channels;
|
||||
const targetColor = [
|
||||
data[startIdx],
|
||||
data[startIdx + 1],
|
||||
data[startIdx + 2],
|
||||
];
|
||||
|
||||
let minX = startX;
|
||||
let minY = startY;
|
||||
let maxX = startX;
|
||||
let maxY = startY;
|
||||
let pixelCount = 0;
|
||||
const stack: Array<[number, number]> = [[startX, startY]];
|
||||
|
||||
visited[startY * width + startX] = 1;
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [x, y] = stack.pop()!;
|
||||
pixelCount++;
|
||||
|
||||
minX = Math.min(minX, x);
|
||||
minY = Math.min(minY, y);
|
||||
maxX = Math.max(maxX, x);
|
||||
maxY = Math.max(maxY, y);
|
||||
|
||||
const neighbors: Array<[number, number]> = [
|
||||
[x + 1, y],
|
||||
[x - 1, y],
|
||||
[x, y + 1],
|
||||
[x, y - 1],
|
||||
];
|
||||
|
||||
for (const [nx, ny] of neighbors) {
|
||||
if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
|
||||
const nIdx = ny * width + nx;
|
||||
if (visited[nIdx] === 0) {
|
||||
const baseIdx = nIdx * channels;
|
||||
const neighborColor = [
|
||||
data[baseIdx],
|
||||
data[baseIdx + 1],
|
||||
data[baseIdx + 2],
|
||||
];
|
||||
if (
|
||||
neighborColor[0] === targetColor[0] &&
|
||||
neighborColor[1] === targetColor[1] &&
|
||||
neighborColor[2] === targetColor[2]
|
||||
) {
|
||||
visited[nIdx] = 1;
|
||||
stack.push([nx, ny]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
x: minX,
|
||||
y: minY,
|
||||
width: maxX - minX + 1,
|
||||
height: maxY - minY + 1,
|
||||
score: pixelCount,
|
||||
};
|
||||
}
|
||||
|
||||
function floodFill(
|
||||
binary: Uint8Array,
|
||||
visited: Uint8Array,
|
||||
startX: number,
|
||||
startY: number,
|
||||
width: number,
|
||||
height: number
|
||||
): BoundingBox {
|
||||
let minX = startX;
|
||||
let minY = startY;
|
||||
let maxX = startX;
|
||||
let maxY = startY;
|
||||
let pixelCount = 0;
|
||||
|
||||
const stack: Array<[number, number]> = [[startX, startY]];
|
||||
|
||||
while (stack.length > 0) {
|
||||
const [x, y] = stack.pop()!;
|
||||
|
||||
if (x < 0 || x >= width || y < 0 || y >= height) continue;
|
||||
|
||||
const idx = y * width + x;
|
||||
if (visited[idx] === 1 || binary[idx] === 0) continue;
|
||||
|
||||
visited[idx] = 1;
|
||||
pixelCount++;
|
||||
|
||||
minX = Math.min(minX, x);
|
||||
minY = Math.min(minY, y);
|
||||
maxX = Math.max(maxX, x);
|
||||
maxY = Math.max(maxY, y);
|
||||
|
||||
stack.push([x + 1, y]);
|
||||
stack.push([x - 1, y]);
|
||||
stack.push([x, y + 1]);
|
||||
stack.push([x, y - 1]);
|
||||
}
|
||||
|
||||
return {
|
||||
x: minX,
|
||||
y: minY,
|
||||
width: maxX - minX + 1,
|
||||
height: maxY - minY + 1,
|
||||
score: pixelCount,
|
||||
};
|
||||
}
|
||||
152
src/slider/detector-self-learning.ts
Normal file
@@ -0,0 +1,152 @@
|
||||
import sharp from 'sharp';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { BoundingBox } from './types';
|
||||
|
||||
async function matchTemplate(
|
||||
image: sharp.Sharp,
|
||||
template: sharp.Sharp,
|
||||
searchArea: { x: number; y: number; width: number; height: number },
|
||||
excludeBox?: BoundingBox
|
||||
): Promise<{ maxVal: number; maxLoc: { x: number; y: number } }> {
|
||||
const { data: imageBuffer, info: imageInfo } = await image
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
const { data: templateBuffer, info: templateInfo } = await template
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width: imageWidth, height: imageHeight, channels: imageChannels } = imageInfo;
|
||||
const { width: templateWidth, height: templateHeight, channels: templateChannels } = templateInfo;
|
||||
|
||||
if (!imageWidth || !imageHeight || !templateWidth || !templateHeight) {
|
||||
throw new Error('Image or template dimensions are invalid.');
|
||||
}
|
||||
|
||||
let maxVal = -Infinity;
|
||||
let maxLoc = { x: 0, y: 0 };
|
||||
|
||||
const startY = Math.max(0, searchArea.y);
|
||||
const endY = Math.min(imageHeight - templateHeight, searchArea.y + searchArea.height);
|
||||
const startX = Math.max(0, searchArea.x);
|
||||
const endX = Math.min(imageWidth - templateWidth, searchArea.x + searchArea.width);
|
||||
|
||||
for (let y = startY; y < endY; y++) {
|
||||
for (let x = startX; x < endX; x++) {
|
||||
// Exclude the original box area from matching by checking for significant overlap
|
||||
if (excludeBox) {
|
||||
const x_overlap = Math.max(0, Math.min(x + templateWidth, excludeBox.x + excludeBox.width) - Math.max(x, excludeBox.x));
|
||||
const y_overlap = Math.max(0, Math.min(y + templateHeight, excludeBox.y + excludeBox.height) - Math.max(y, excludeBox.y));
|
||||
const overlapArea = x_overlap * y_overlap;
|
||||
if (overlapArea / (templateWidth * templateHeight) > 0.5) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let sumC = 0, sumT2 = 0, sumI2 = 0;
|
||||
|
||||
for (let ty = 0; ty < templateHeight; ty++) {
|
||||
for (let tx = 0; tx < templateWidth; tx++) {
|
||||
const imageY = y + ty;
|
||||
const imageX = x + tx;
|
||||
|
||||
const imageIdx = (imageY * imageWidth + imageX) * imageChannels;
|
||||
const templateIdx = (ty * templateWidth + tx) * templateChannels;
|
||||
|
||||
const imageVal = imageBuffer[imageIdx];
|
||||
const templateVal = templateBuffer[templateIdx];
|
||||
|
||||
sumC += imageVal * templateVal;
|
||||
sumT2 += templateVal * templateVal;
|
||||
sumI2 += imageVal * imageVal;
|
||||
}
|
||||
}
|
||||
|
||||
const denominator = Math.sqrt(sumT2 * sumI2);
|
||||
const val = denominator === 0 ? 0 : sumC / denominator;
|
||||
|
||||
if (val > maxVal) {
|
||||
maxVal = val;
|
||||
maxLoc = { x, y };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { maxVal, maxLoc };
|
||||
}
|
||||
|
||||
export class SelfLearningSliderDetector {
|
||||
private async cannyEdge(image: sharp.Sharp): Promise<sharp.Sharp> {
|
||||
return image
|
||||
.grayscale()
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true })
|
||||
.then(({ data, info }) => {
|
||||
const sobelData = Buffer.alloc(info.width * info.height);
|
||||
for (let y = 1; y < info.height - 1; y++) {
|
||||
for (let x = 1; x < info.width - 1; x++) {
|
||||
const Gx = -data[(y - 1) * info.width + x - 1] - 2 * data[y * info.width + x - 1] - data[(y + 1) * info.width + x - 1] + data[(y - 1) * info.width + x + 1] + 2 * data[y * info.width + x + 1] + data[(y + 1) * info.width + x + 1];
|
||||
const Gy = -data[(y - 1) * info.width + x - 1] - 2 * data[(y - 1) * info.width + x] - data[(y - 1) * info.width + x + 1] + data[(y + 1) * info.width + x - 1] + 2 * data[(y + 1) * info.width + x] + data[(y + 1) * info.width + x + 1];
|
||||
const magnitude = Math.sqrt(Gx * Gx + Gy * Gy);
|
||||
sobelData[y * info.width + x] = magnitude > 50 ? 255 : 0;
|
||||
}
|
||||
}
|
||||
return sharp(sobelData, { raw: { width: info.width, height: info.height, channels: 1 } });
|
||||
});
|
||||
}
|
||||
|
||||
public async detectSecondSlider(imagePath: string, seedBox: BoundingBox): Promise<BoundingBox | null> {
|
||||
try {
|
||||
const image = sharp(imagePath);
|
||||
const { width: imageWidth, height: imageHeight } = await image.metadata();
|
||||
|
||||
if (!imageWidth || !imageHeight) return null;
|
||||
|
||||
const template = image.clone().extract({
|
||||
left: seedBox.x,
|
||||
top: seedBox.y,
|
||||
width: seedBox.width,
|
||||
height: seedBox.height,
|
||||
});
|
||||
|
||||
const debugDir = path.join(__dirname, '..', '..', 'images', 'debug');
|
||||
if (!fs.existsSync(debugDir)) fs.mkdirSync(debugDir, { recursive: true });
|
||||
const templateFileName = `template-${path.basename(imagePath)}`;
|
||||
await template.toFile(path.join(debugDir, templateFileName));
|
||||
console.log(` [SelfLearning] Saved refined template to: ${templateFileName}`);
|
||||
|
||||
const imageEdge = await this.cannyEdge(image);
|
||||
const templateEdge = await this.cannyEdge(template);
|
||||
|
||||
const searchArea = {
|
||||
x: 0,
|
||||
y: Math.max(0, seedBox.y - 25), // 显著放宽垂直搜索范围
|
||||
width: imageWidth,
|
||||
height: seedBox.height + 50, // 显著放宽垂直搜索范围
|
||||
};
|
||||
|
||||
const { maxVal, maxLoc } = await matchTemplate(imageEdge, templateEdge, searchArea, seedBox);
|
||||
console.log(` [SelfLearning] Max score for ${path.basename(imagePath)}: ${maxVal.toFixed(4)} at y=${maxLoc.y}`);
|
||||
|
||||
// 验证第二个滑块是否在同一水平线上,放宽y轴偏差到25px
|
||||
if (Math.abs(maxLoc.y - seedBox.y) > 25) {
|
||||
console.log(` [SelfLearning] Discarded second slider candidate because it's not on the same horizontal line (y-delta: ${Math.abs(maxLoc.y - seedBox.y)}px).`);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (maxVal > 0.35) { // 使用一个相对宽松但合理的阈值
|
||||
return {
|
||||
x: maxLoc.x,
|
||||
y: maxLoc.y,
|
||||
width: seedBox.width,
|
||||
height: seedBox.height,
|
||||
score: maxVal,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error(`Error during self-learning detection for ${imagePath}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
280
src/slider/detector.ts
Normal file
@@ -0,0 +1,280 @@
|
||||
import sharp from 'sharp';
|
||||
import { BoundingBox, RawImage } from './types';
|
||||
import { findCandidateBoxes } from './detection/candidate-search';
|
||||
import { calculateIoU } from './utils/geometry';
|
||||
import { SelfLearningSliderDetector } from './detector-self-learning';
|
||||
|
||||
type BoxColor = 'red' | 'blue' | 'green';
|
||||
const DEFAULT_DRAW_COLOR: BoxColor = 'blue';
|
||||
|
||||
export class SliderDetector {
|
||||
private readonly selfLearning: SelfLearningSliderDetector;
|
||||
|
||||
constructor(selfLearning?: SelfLearningSliderDetector) {
|
||||
this.selfLearning = selfLearning ?? new SelfLearningSliderDetector();
|
||||
}
|
||||
|
||||
async detectSlider(
|
||||
imagePath: string,
|
||||
outputPath?: string,
|
||||
detectMultiple: boolean = true
|
||||
): Promise<BoundingBox[] | null> {
|
||||
try {
|
||||
const baseImage = sharp(imagePath);
|
||||
|
||||
const [originalRaw, normalizedRaw] = await Promise.all([
|
||||
baseImage
|
||||
.clone()
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true }),
|
||||
baseImage
|
||||
.clone()
|
||||
.normalize()
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true }),
|
||||
]);
|
||||
|
||||
const original = toRawImage(originalRaw.data, originalRaw.info);
|
||||
const normalized = toRawImage(normalizedRaw.data, normalizedRaw.info);
|
||||
|
||||
const candidates = await findCandidateBoxes({
|
||||
original,
|
||||
normalized,
|
||||
quantizationSource: baseImage.clone(),
|
||||
});
|
||||
|
||||
if (candidates.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const seedBox = candidates[0];
|
||||
|
||||
const edgeImage = await this.cannyEdge(baseImage.clone());
|
||||
const refinedSeed = await this.refineBox(seedBox, edgeImage);
|
||||
|
||||
const detections: BoundingBox[] = [refinedSeed];
|
||||
|
||||
if (detectMultiple) {
|
||||
const second = await this.selfLearning.detectSecondSlider(
|
||||
imagePath,
|
||||
refinedSeed
|
||||
);
|
||||
if (second && calculateIoU(refinedSeed, second) < 0.5) {
|
||||
detections.push(second);
|
||||
}
|
||||
}
|
||||
|
||||
if (outputPath) {
|
||||
await this.drawBoundingBoxes(imagePath, detections, outputPath, DEFAULT_DRAW_COLOR);
|
||||
}
|
||||
|
||||
return detections;
|
||||
} catch (error) {
|
||||
console.error(`Error detecting slider in ${imagePath}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async annotate(
|
||||
imagePath: string,
|
||||
boxes: BoundingBox[],
|
||||
outputPath: string,
|
||||
color: BoxColor = DEFAULT_DRAW_COLOR
|
||||
): Promise<void> {
|
||||
await this.drawBoundingBoxes(imagePath, boxes, outputPath, color);
|
||||
}
|
||||
|
||||
private async cannyEdge(image: sharp.Sharp): Promise<sharp.Sharp> {
|
||||
const { data, info } = await image
|
||||
.clone()
|
||||
.grayscale()
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height } = info;
|
||||
if (!width || !height) {
|
||||
throw new Error('Cannot compute edges without image dimensions');
|
||||
}
|
||||
|
||||
const sobelData = Buffer.alloc(width * height);
|
||||
for (let y = 1; y < height - 1; y++) {
|
||||
for (let x = 1; x < width - 1; x++) {
|
||||
const idx = y * width + x;
|
||||
const gx =
|
||||
-data[(y - 1) * width + (x - 1)] -
|
||||
2 * data[y * width + (x - 1)] -
|
||||
data[(y + 1) * width + (x - 1)] +
|
||||
data[(y - 1) * width + (x + 1)] +
|
||||
2 * data[y * width + (x + 1)] +
|
||||
data[(y + 1) * width + (x + 1)];
|
||||
const gy =
|
||||
-data[(y - 1) * width + (x - 1)] -
|
||||
2 * data[(y - 1) * width + x] -
|
||||
data[(y - 1) * width + (x + 1)] +
|
||||
data[(y + 1) * width + (x - 1)] +
|
||||
2 * data[(y + 1) * width + x] +
|
||||
data[(y + 1) * width + (x + 1)];
|
||||
const magnitude = Math.sqrt(gx * gx + gy * gy);
|
||||
sobelData[idx] = magnitude > 50 ? 255 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
return sharp(sobelData, {
|
||||
raw: { width, height, channels: 1 },
|
||||
});
|
||||
}
|
||||
|
||||
private async refineBox(
|
||||
box: BoundingBox,
|
||||
edgeImage: sharp.Sharp
|
||||
): Promise<BoundingBox> {
|
||||
try {
|
||||
const { data, info } = await edgeImage
|
||||
.clone()
|
||||
.extract({
|
||||
left: box.x,
|
||||
top: box.y,
|
||||
width: box.width,
|
||||
height: box.height,
|
||||
})
|
||||
.raw()
|
||||
.toBuffer({ resolveWithObject: true });
|
||||
|
||||
const { width, height } = info;
|
||||
|
||||
const projX = new Array(width).fill(0);
|
||||
const projY = new Array(height).fill(0);
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
const pixel = data[y * width + x];
|
||||
if (pixel > 0) {
|
||||
projX[x]++;
|
||||
projY[y]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const findBounds = (proj: number[], minThreshold = 2) => {
|
||||
let start = -1;
|
||||
let end = -1;
|
||||
|
||||
for (let i = 0; i < proj.length; i++) {
|
||||
if (proj[i] >= minThreshold) {
|
||||
if (start === -1) start = i;
|
||||
end = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (start === -1) {
|
||||
return { start: 0, end: proj.length - 1 };
|
||||
}
|
||||
|
||||
let bestStart = start;
|
||||
for (let i = start; i < Math.min(proj.length, start + 10); i++) {
|
||||
if (proj[i] >= minThreshold) {
|
||||
bestStart = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let bestEnd = end;
|
||||
for (let i = end; i >= Math.max(0, end - 10); i--) {
|
||||
if (proj[i] >= minThreshold) {
|
||||
bestEnd = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return { start: bestStart, end: bestEnd };
|
||||
};
|
||||
|
||||
const { start: xStart, end: xEnd } = findBounds(projX);
|
||||
const { start: yStart, end: yEnd } = findBounds(projY);
|
||||
|
||||
const newX = box.x + xStart;
|
||||
const newY = box.y + yStart;
|
||||
const newWidth = xEnd - xStart + 1;
|
||||
const newHeight = yEnd - yStart + 1;
|
||||
|
||||
if (
|
||||
newWidth <= 10 ||
|
||||
newHeight <= 10 ||
|
||||
newWidth > box.width * 1.2 ||
|
||||
newHeight > box.height * 1.2
|
||||
) {
|
||||
return box;
|
||||
}
|
||||
|
||||
return {
|
||||
x: newX,
|
||||
y: newY,
|
||||
width: newWidth,
|
||||
height: newHeight,
|
||||
score: box.score,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[RefineBox] Failed to refine candidate, returning original box.', error);
|
||||
return box;
|
||||
}
|
||||
}
|
||||
|
||||
private async drawBoundingBoxes(
|
||||
imagePath: string,
|
||||
boxes: BoundingBox[],
|
||||
outputPath: string,
|
||||
color: BoxColor = 'blue'
|
||||
): Promise<void> {
|
||||
if (boxes.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const colorMap: Record<BoxColor, { r: number; g: number; b: number }> = {
|
||||
red: { r: 255, g: 0, b: 0 },
|
||||
blue: { r: 0, g: 0, b: 255 },
|
||||
green: { r: 0, g: 255, b: 0 },
|
||||
};
|
||||
|
||||
const rgb = colorMap[color];
|
||||
|
||||
const image = sharp(imagePath);
|
||||
const metadata = await image.metadata();
|
||||
|
||||
if (!metadata.width || !metadata.height) {
|
||||
throw new Error('Cannot draw bounding boxes without image dimensions');
|
||||
}
|
||||
|
||||
const rectangles = boxes
|
||||
.map(
|
||||
(box) => `
|
||||
<rect
|
||||
x="${box.x}"
|
||||
y="${box.y}"
|
||||
width="${box.width}"
|
||||
height="${box.height}"
|
||||
fill="none"
|
||||
stroke="rgb(${rgb.r},${rgb.g},${rgb.b})"
|
||||
stroke-width="2"
|
||||
/>`
|
||||
)
|
||||
.join('\n');
|
||||
|
||||
const svg = Buffer.from(
|
||||
`<svg width="${metadata.width}" height="${metadata.height}" xmlns="http://www.w3.org/2000/svg">
|
||||
${rectangles}
|
||||
</svg>`
|
||||
);
|
||||
|
||||
await image
|
||||
.composite([{ input: svg, top: 0, left: 0 }])
|
||||
.toFile(outputPath);
|
||||
}
|
||||
}
|
||||
|
||||
function toRawImage(data: Buffer, info: sharp.OutputInfo): RawImage {
|
||||
const { width, height, channels } = info;
|
||||
if (!width || !height || !channels) {
|
||||
throw new Error('Failed to read image metadata.');
|
||||
}
|
||||
return { data, width, height, channels };
|
||||
}
|
||||
6
src/slider/index.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
export { SliderController } from './slider-controller';
|
||||
export { SliderDetector } from './detector';
|
||||
export { SliderValidator } from './validator';
|
||||
export { SelfLearningSliderDetector } from './detector-self-learning';
|
||||
export type { BoundingBox, Rectangle, RawImage } from './types';
|
||||
export type { SliderSolveResult } from './slider-controller';
|
||||
1140
src/slider/slider-controller.ts
Normal file
17
src/slider/types.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
export interface Rectangle {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface BoundingBox extends Rectangle {
|
||||
score: number;
|
||||
}
|
||||
|
||||
export interface RawImage {
|
||||
data: Buffer;
|
||||
width: number;
|
||||
height: number;
|
||||
channels: number;
|
||||
}
|
||||
16
src/slider/utils/geometry.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
import { Rectangle } from '../types';
|
||||
|
||||
/**
|
||||
* Calculate intersection over union for two bounding boxes.
|
||||
*/
|
||||
export function calculateIoU(a: Rectangle, b: Rectangle): number {
|
||||
const x1 = Math.max(a.x, b.x);
|
||||
const y1 = Math.max(a.y, b.y);
|
||||
const x2 = Math.min(a.x + a.width, b.x + b.width);
|
||||
const y2 = Math.min(a.y + a.height, b.y + b.height);
|
||||
|
||||
const intersection = Math.max(0, x2 - x1) * Math.max(0, y2 - y1);
|
||||
const union = a.width * a.height + b.width * b.height - intersection;
|
||||
|
||||
return union === 0 ? 0 : intersection / union;
|
||||
}
|
||||
136
src/slider/utils/image.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
import { RawImage } from '../types';
|
||||
|
||||
/**
|
||||
* Convert RGB data to grayscale array.
|
||||
*/
|
||||
function toGrayscale(
|
||||
data: Buffer,
|
||||
width: number,
|
||||
height: number,
|
||||
channels: number
|
||||
): Uint8Array {
|
||||
const gray = new Uint8Array(width * height);
|
||||
for (let i = 0; i < width * height; i++) {
|
||||
const idx = i * channels;
|
||||
gray[i] = Math.round(
|
||||
data[idx] * 0.299 + data[idx + 1] * 0.587 + data[idx + 2] * 0.114
|
||||
);
|
||||
}
|
||||
return gray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Produce a Sobel edge map from raw RGB data.
|
||||
*/
|
||||
export function createEdgeMap({
|
||||
data,
|
||||
width,
|
||||
height,
|
||||
channels,
|
||||
}: RawImage): Uint8Array {
|
||||
const gray = toGrayscale(data, width, height, channels);
|
||||
const edges = new Uint8Array(width * height);
|
||||
|
||||
for (let y = 1; y < height - 1; y++) {
|
||||
for (let x = 1; x < width - 1; x++) {
|
||||
const idx = y * width + x;
|
||||
const gx =
|
||||
-gray[(y - 1) * width + (x - 1)] +
|
||||
gray[(y - 1) * width + (x + 1)] -
|
||||
2 * gray[idx - 1] +
|
||||
2 * gray[idx + 1] -
|
||||
gray[(y + 1) * width + (x - 1)] +
|
||||
gray[(y + 1) * width + (x + 1)];
|
||||
|
||||
const gy =
|
||||
-gray[(y - 1) * width + (x - 1)] -
|
||||
2 * gray[(y - 1) * width + x] -
|
||||
gray[(y - 1) * width + (x + 1)] +
|
||||
gray[(y + 1) * width + (x - 1)] +
|
||||
2 * gray[(y + 1) * width + x] +
|
||||
gray[(y + 1) * width + (x + 1)];
|
||||
|
||||
const magnitude = Math.sqrt(gx * gx + gy * gy);
|
||||
edges[idx] = magnitude > 40 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
return edges;
|
||||
}
|
||||
|
||||
/**
|
||||
* Morphological closing (dilate followed by erode).
|
||||
*/
|
||||
export function morphologyClose(
|
||||
binary: Uint8Array,
|
||||
width: number,
|
||||
height: number,
|
||||
kernelSize: number
|
||||
): Uint8Array {
|
||||
const dilated = dilate(binary, width, height, kernelSize);
|
||||
return erode(dilated, width, height, kernelSize);
|
||||
}
|
||||
|
||||
export function dilate(
|
||||
binary: Uint8Array,
|
||||
width: number,
|
||||
height: number,
|
||||
kernelSize: number
|
||||
): Uint8Array {
|
||||
const result = new Uint8Array(width * height);
|
||||
const offset = Math.floor(kernelSize / 2);
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
let maxVal = 0;
|
||||
|
||||
for (let ky = -offset; ky <= offset; ky++) {
|
||||
for (let kx = -offset; kx <= offset; kx++) {
|
||||
const ny = y + ky;
|
||||
const nx = x + kx;
|
||||
|
||||
if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
|
||||
maxVal = Math.max(maxVal, binary[ny * width + nx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result[y * width + x] = maxVal;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export function erode(
|
||||
binary: Uint8Array,
|
||||
width: number,
|
||||
height: number,
|
||||
kernelSize: number
|
||||
): Uint8Array {
|
||||
const result = new Uint8Array(width * height);
|
||||
const offset = Math.floor(kernelSize / 2);
|
||||
|
||||
for (let y = 0; y < height; y++) {
|
||||
for (let x = 0; x < width; x++) {
|
||||
let minVal = 1;
|
||||
|
||||
for (let ky = -offset; ky <= offset; ky++) {
|
||||
for (let kx = -offset; kx <= offset; kx++) {
|
||||
const ny = y + ky;
|
||||
const nx = x + kx;
|
||||
|
||||
if (nx >= 0 && nx < width && ny >= 0 && ny < height) {
|
||||
minVal = Math.min(minVal, binary[ny * width + nx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result[y * width + x] = minVal;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export { toGrayscale };
|
||||
105
src/slider/validator.ts
Normal file
@@ -0,0 +1,105 @@
|
||||
import { BoundingBox, Rectangle } from './types';
|
||||
import { calculateIoU } from './utils/geometry';
|
||||
|
||||
class SliderValidator {
|
||||
|
||||
/**
|
||||
* 检查两个框是否匹配(允许一定偏差)
|
||||
*/
|
||||
isBoxMatching(detected: Rectangle, target: Rectangle, tolerance: number = 10): boolean {
|
||||
// 计算中心点
|
||||
const detectedCenterX = detected.x + detected.width / 2;
|
||||
const detectedCenterY = detected.y + detected.height / 2;
|
||||
const targetCenterX = target.x + target.width / 2;
|
||||
const targetCenterY = target.y + target.height / 2;
|
||||
|
||||
// 中心点距离
|
||||
const centerDistance = Math.sqrt(
|
||||
Math.pow(detectedCenterX - targetCenterX, 2) +
|
||||
Math.pow(detectedCenterY - targetCenterY, 2)
|
||||
);
|
||||
|
||||
// 尺寸差异 - 允许更大的容差,因为形态学操作可能改变大小
|
||||
const widthDiff = Math.abs(detected.width - target.width);
|
||||
const heightDiff = Math.abs(detected.height - target.height);
|
||||
|
||||
// 如果中心点距离小于容差,且尺寸差异不太大,认为匹配
|
||||
// 放宽尺寸容差到30px(考虑到形态学操作的影响)
|
||||
return centerDistance <= tolerance && widthDiff <= 30 && heightDiff <= 30;
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算IoU(交并比)
|
||||
*/
|
||||
calculateIoU(box1: Rectangle, box2: Rectangle): number {
|
||||
return calculateIoU(box1, box2);
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证检测结果
|
||||
*/
|
||||
async validateDetection(
|
||||
detectedBoxes: Rectangle[],
|
||||
targetBoxes: Rectangle[],
|
||||
tolerance: number = 10
|
||||
): Promise<{
|
||||
totalTargets: number;
|
||||
detectedCount: number;
|
||||
matchedCount: number;
|
||||
precision: number;
|
||||
recall: number;
|
||||
matches: Array<{ detected: Rectangle; target: Rectangle; iou: number }>;
|
||||
unmatched: Rectangle[];
|
||||
}> {
|
||||
const matches: Array<{ detected: Rectangle; target: Rectangle; iou: number }> = [];
|
||||
const matchedTargets = new Set<number>();
|
||||
const matchedDetected = new Set<number>();
|
||||
|
||||
// 1. 找出所有可能的匹配对
|
||||
const potentialMatches: Array<{ detIdx: number; tarIdx: number; iou: number }> = [];
|
||||
for (let i = 0; i < detectedBoxes.length; i++) {
|
||||
for (let j = 0; j < targetBoxes.length; j++) {
|
||||
if (this.isBoxMatching(detectedBoxes[i], targetBoxes[j], tolerance)) {
|
||||
const iou = this.calculateIoU(detectedBoxes[i], targetBoxes[j]);
|
||||
if (iou > 0.1) { // 设置一个IoU的下限
|
||||
potentialMatches.push({ detIdx: i, tarIdx: j, iou });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 按IoU从高到低排序
|
||||
potentialMatches.sort((a, b) => b.iou - a.iou);
|
||||
|
||||
// 3. 贪心选择最佳匹配
|
||||
for (const match of potentialMatches) {
|
||||
if (!matchedDetected.has(match.detIdx) && !matchedTargets.has(match.tarIdx)) {
|
||||
matches.push({
|
||||
detected: detectedBoxes[match.detIdx],
|
||||
target: targetBoxes[match.tarIdx],
|
||||
iou: match.iou
|
||||
});
|
||||
matchedDetected.add(match.detIdx);
|
||||
matchedTargets.add(match.tarIdx);
|
||||
}
|
||||
}
|
||||
|
||||
// 未匹配的检测框
|
||||
const unmatched = detectedBoxes.filter((_, i) => !matchedDetected.has(i));
|
||||
|
||||
const precision = detectedBoxes.length > 0 ? matches.length / detectedBoxes.length : 0;
|
||||
const recall = targetBoxes.length > 0 ? matches.length / targetBoxes.length : 0;
|
||||
|
||||
return {
|
||||
totalTargets: targetBoxes.length,
|
||||
detectedCount: detectedBoxes.length,
|
||||
matchedCount: matches.length,
|
||||
precision,
|
||||
recall,
|
||||
matches,
|
||||
unmatched
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export { SliderValidator, BoundingBox, Rectangle };
|
||||