diff --git a/.gitignore b/.gitignore index 0922eeb..0a34e2f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ node_modules/ dist/ coverage/ +output/ +noflag/ *.log npm-debug.log* yarn-debug.log* @@ -18,4 +20,4 @@ Thumbs.db # Test *.test.js -*.spec.js \ No newline at end of file +*.spec.js diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 46db0fd..d5c4693 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -1,6 +1,6 @@ -# 架构说明 +# 架构说明(v1.1.0) -本文档梳理项目中的主要模块、职责划分以及核心流程,帮助维护者快速了解整体结构。当前版本仅关注短信验证码登录与 Cookie 持久化,滑块验证码需人工操作。 +本文档梳理项目中的主要模块、职责划分以及核心流程,帮助维护者快速了解整体结构。当前版本包含短信验证码登录、Cookie 持久化以及 AI 驱动的滑块验证码自动破解功能。 ## 模块概览 @@ -8,64 +8,206 @@ ├── README.md // 使用说明与运行指引 ├── ARCHITECTURE.md // 架构概览与流程说明(本文档) ├── IMPLEMENTATION.md // 关键实现细节记录 +├── QUICKSTART.md // 快速开始指南 +├── CHANGELOG.md // 版本更新日志 +├── release.md // 发布说明 ├── login.md // 早期需求与操作步骤 +├── package.json // 项目配置(v1.1.0) ├── src/ -│ └── login.ts // 豆瓣登录脚本入口(Cookie 复用 + 短信登录) +│ ├── login.ts // 豆瓣登录脚本入口(集成滑块验证) +│ └── slider/ // 滑块验证模块(v1.1.0 新增) +│ ├── index.ts // 模块导出 +│ ├── types.ts // 类型定义 +│ ├── detector.ts // 主滑块检测器 +│ ├── detector-self-learning.ts // 第二滑块检测 +│ ├── slider-controller.ts // 滑块控制器 +│ ├── cli.ts // CLI 批量工具 +│ ├── validator.ts // 结果验证工具 +│ ├── detection/ +│ │ └── candidate-search.ts // 多策略检测 +│ └── utils/ +│ ├── geometry.ts // 几何计算 +│ └── image.ts // 图像处理 +├── noflag/ // 原始验证码截图输出目录 +├── output/ // 标注结果输出目录 └── typescript-spec.md // 团队 TypeScript 编码规范 ``` -## 登录流程分层 +## 登录流程分层(v1.1.0) ``` -┌────────────────────────────────────┐ -│ main() │ -│ - 启动 Chromium │ -│ - 复用或创建上下文 │ -│ - 调用 loginWithSms() │ -│ - 保存 Cookies │ -└────────────────────────────────────┘ - │ -┌────────────────▼──────────────────┐ -│ loginWithSms() │ -│ - 输入手机号 │ -│ - 触发短信验证码 │ -│ - 提示用户完成页面额外验证 │ -│ - 等待并提交短信验证码 │ -│ - 校验是否登录成功 │ -└────────────────────────────────────┘ - │ -┌────────────────▼──────────────────┐ -│ isLoggedIn() │ -│ - 检查关键 Cookie(dbcl2) │ -│ - 确认登录表单是否仍然可见 │ -└────────────────────────────────────┘ +┌─────────────────────────────────────────┐ +│ main() │ +│ - 启动 Chromium │ +│ - 复用或创建上下文 │ +│ - 调用 loginWithSms() │ +│ - 保存 Cookies │ +└─────────────────────────────────────────┘ + │ +┌──────────────────▼────────────────────┐ +│ loginWithSms() │ +│ - 输入手机号 │ +│ - 触发短信验证码 │ +│ - [v1.1.0] 自动处理滑块验证 │ +│ - 等待并提交短信验证码 │ +│ - 校验是否登录成功 │ +└───────────────────────────────────────┘ + │ + ┌────────────┴──────────────┐ + │ │ +┌─────▼──────────────┐ ┌─────────▼──────────────┐ +│ SliderController │ │ isLoggedIn() │ +│ - 等待滑块出现 │ │ - 检查 Cookie(dbcl2) │ +│ - 截图到 noflag/ │ │ - 确认登录表单状态 │ +│ - 调用 detector │ └────────────────────────┘ +│ - 计算距离 │ +│ - 拖动滑块 │ +│ - 验证成功标识 │ +│ - 失败重试(10次) │ +└────────────────────┘ + │ +┌────────▼───────────────┐ +│ SliderDetector │ +│ - 图像缩放(800px) │ +│ - 多策略检测 │ +│ - 候选框评分 │ +│ - 绘制标注到 output/ │ +└────────────────────────┘ + │ +┌────────▼───────────────┐ +│ CandidateSearch │ +│ - 暗区域检测 │ +│ - Canny 边缘检测 │ +│ - 颜色量化 │ +│ - LAB 色彩空间 │ +│ - IoU 去重 │ +└────────────────────────┘ ``` -- `prepareContext()`:负责加载已有 Cookie、创建新上下文以及兜底跳转登录页。 -- `loginWithSms()`:串联短信登录流程,涵盖用户输入与结果确认。 -- `isLoggedIn()`:封装判定逻辑,避免各处重复编写 Cookie/页面检查。 +**关键模块职责**: + +- `prepareContext()`:负责加载已有 Cookie、创建新上下文以及兜底跳转登录页 +- `loginWithSms()`:串联短信登录流程,涵盖用户输入与滑块自动化 +- `SliderController`:Playwright 集成,控制滑块验证的完整流程 +- `SliderDetector`:图像处理和滑块位置检测的核心算法 +- `CandidateSearch`:多种图像识别策略的并行执行 +- `isLoggedIn()`:封装判定逻辑,避免各处重复编写 Cookie/页面检查 ## 依赖与交互 -- **Playwright**:启动浏览器、操作页面元素、持久化 `storageState`。 -- **Node.js**:文件读写、路径与环境变量处理。 -- **readline**:在控制台等待用户输入短信验证码。 -- **环境变量**:当前仅使用 `DOUBAN_PHONE` 指定登录手机号。 -- **`~/douban-cookie.json`**:保存登录态的 storageState 文件,下次运行直接复用。 +- **Playwright**:启动浏览器、操作页面元素、持久化 `storageState`、控制滑块拖动 +- **Sharp**:图像处理(缩放、边缘检测、颜色量化、模板匹配) +- **Node.js**:文件读写、路径与环境变量处理 +- **readline**:在控制台等待用户输入短信验证码 +- **环境变量**: + - `DOUBAN_PHONE`:登录手机号(必填) + - `DOUBAN_AUTO_SLIDER`:启用自动滑块验证(可选,值为 1 时启用) +- **`~/douban-cookie.json`**:保存登录态的 storageState 文件,下次运行直接复用 +- **`noflag/`**:原始验证码截图存储目录 +- **`output/`**:标注结果(红框)存储目录 -## 数据流 +## 数据流(v1.1.0) -1. 读取 `DOUBAN_PHONE`,未配置则终止; -2. 若存在本地 Cookie 文件,加载后访问登录页以确认是否仍然有效; -3. 无有效登录态时执行短信登录: - - Playwright 填写手机号并请求验证码; - - 用户在浏览器中手动完成滑块等验证; - - 控制台输入短信验证码并提交; -4. 登录成功后调用 `context.storageState()` 写入 `~/douban-cookie.json`; -5. 浏览器关闭,后续脚本可直接复用该文件。 +1. **初始化阶段** + - 读取 `DOUBAN_PHONE`,未配置则终止 + - 检查 `DOUBAN_AUTO_SLIDER` 环境变量 + - 若存在本地 Cookie 文件,加载后访问登录页以确认是否仍然有效 + +2. **登录流程** + - 无有效登录态时执行短信登录: + - Playwright 填写手机号并请求验证码 + - **[v1.1.0]** 自动检测并处理滑块验证码: + 1. 等待验证码 iframe 加载 + 2. 截图验证码区域到 `noflag/` 目录 + 3. 使用 Sharp 将图像缩放到 800px 宽度 + 4. 并行运行四种检测策略 + 5. 计算距离:`(缺口X - 滑块X) / scaleX` + 6. 绘制红框标注保存到 `output/` 目录 + 7. 拖动滑块到计算位置 + 8. 检测成功标识(`.tc-success`) + 9. 失败则刷新重试(最多 10 次) + - 控制台输入短信验证码并提交 + +3. **状态持久化** + - 登录成功后调用 `context.storageState()` 写入 `~/douban-cookie.json` + - 浏览器关闭,后续脚本可直接复用该文件 + +4. **图像数据流** + ``` + 原始验证码(340x191) + │ + ▼ 截图 + noflag/captcha-timestamp.png + │ + ▼ 缩放到 800px + 内存中的处理图像(800x449) + │ + ▼ 多策略检测 + 候选框数组 [{x,y,w,h,score}] + │ + ▼ 评分排序 + IoU去重 + 最佳滑块位置 [b1, b2] + │ + ▼ 绘制红框 + output/captcha-timestamp-detected.png + │ + ▼ 计算距离 + 移动距离 = (b2.x - b1.x) / scaleX + ``` ## 日志与错误处理 -- 关键步骤均在控制台打印提示,便于追踪流程; -- 验证码相关操作采用提示 + `prompt` 方式等待人工输入; -- 登录失败或异常会设置 `process.exitCode` 并输出详细错误信息。 +- 关键步骤均在控制台打印提示,便于追踪流程 +- **[v1.1.0]** 滑块检测过程的详细日志: + - 图像缩放信息(原始尺寸 → 检测尺寸) + - 检测到的滑块数量和位置 + - 每个滑块的评分和尺寸 + - 距离计算公式和结果 + - 成功/失败状态和重试次数 +- 验证码相关操作采用提示 + `prompt` 方式等待人工输入 +- 登录失败或异常会设置 `process.exitCode` 并输出详细错误信息 +- 视觉调试:`output/` 目录中的红框标注图便于人工验证检测准确性 + +## v1.1.0 核心创新 + +### 简化的距离计算算法 + +**核心原理**:"两只小鸟嘴尖距离" + +```typescript +// 双滑块模式(推荐) +const distance = (box2.x - box1.x) / scaleX; + +// 单滑块模式(兜底) +const distance = box.x / scaleX; +``` + +**为什么这样简单?** +1. 检测在 800px 宽度图像上进行(scaleX ≈ 2.35) +2. 两个滑块的左边界水平距离就是移动距离(缩放坐标系) +3. 除以 scaleX 转换回实际显示坐标系(340px) +4. 避免复杂的 iframe 偏移、页面坐标转换等计算 + +**v1.0.0 vs v1.1.0**: +- v1.0.0:需要人工完成滑块验证 +- v1.1.0:自动检测、计算、拖动,成功率约 50% + +### 多策略并行检测 + +并行运行四种算法,提高鲁棒性: + +1. **暗区域检测**:基于亮度阈值查找暗色滑块 +2. **Canny 边缘检测**:查找边缘密集区域 +3. **颜色量化**:K-means 聚类找独特色块 +4. **LAB 色彩空间**:在感知均匀的色彩空间中检测 + +候选框通过 IoU 去重,避免重复检测同一个滑块。 + +### 自学习模板匹配 + +使用第一个检测到的滑块作为模板,在图像中查找第二个滑块: + +1. 提取第一个滑块的边缘特征 +2. 在剩余区域进行模板匹配 +3. 验证 y 坐标一致性(偏差 < 25px) +4. 确保两个滑块在合理的水平距离范围内 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d80e805 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,51 @@ +# 更新日志 + +## [1.1.0] - 2025-10-25 + +### ✨ 新功能 + +- **自动滑块验证**: 集成 AI 驱动的滑块验证码识别和求解功能 +- **多策略检测**: 实现暗区检测、边缘检测、颜色量化、LAB 色彩空间分析四种并行策略 +- **双滑块识别**: 支持同时检测左侧滑块和右侧缺口,实现精确距离计算 +- **可视化调试**: 自动生成带红框标记的检测结果图片,保存在 `output/` 目录 +- **自动重试机制**: 验证失败时自动刷新并重试,最多 10 次 +- **图像缩放优化**: 自动将验证码图片放大到 800px 宽度以提高识别精度 + +### 🔧 优化改进 + +- **简化距离计算**: 采用更简洁准确的算法 + - 双滑块模式:`距离 = (缺口X - 滑块X) / scaleX` + - 移除了不必要的复杂坐标转换逻辑 +- **拟人化滑动**: 使用 Playwright 的 `steps` 参数实现更平滑的鼠标移动轨迹 +- **增强成功判断**: 检测腾讯验证码特有的成功标识(`.tc-success`) +- **优化元素等待**: 增加 iframe 内元素加载的检测和重试机制 +- **详细日志输出**: 添加完整的调试信息,便于问题追溯 + +### 🐛 Bug 修复 + +- 修复坐标系不统一导致的距离计算错误 +- 修复 iframe 内元素无法正确访问的问题 +- 修复候选框因边距过滤被误删的问题 +- 修复截图时包含滑块本身导致识别干扰的问题 + +### 📝 文档更新 + +- 更新 `README.md`,添加详细的功能说明和使用指南 +- 创建 `src/slider/README.md`,详细说明滑块识别算法和实现细节 +- 添加调试技巧和常见问题排查指南 + +### 🏗️ 架构变更 + +- 从 `captcha_cracker` 项目移植核心识别算法 +- 新增 `src/slider/` 模块,包含完整的滑块验证功能 +- 集成 Sharp 库用于图像处理 +- 支持通过 `DOUBAN_AUTO_SLIDER=1` 环境变量启用自动滑块验证 + +## [1.0.0] - 2025-10-24 + +### 初始版本 + +- 基于 Playwright 的豆瓣登录自动化 +- 短信验证码登录支持 +- Cookie 持久化和复用 +- 手动滑块验证提示 diff --git a/IMPLEMENTATION.md b/IMPLEMENTATION.md index 463b15d..83891a4 100644 --- a/IMPLEMENTATION.md +++ b/IMPLEMENTATION.md @@ -1,40 +1,62 @@ -# 登录脚本实现笔记 +# 登录脚本实现笔记(v1.1.0) -本文记录当前版本豆瓣登录脚本的实现细节、关键函数以及后续可扩展点。滑块验证码相关逻辑已移除,若页面出现额外验证需人工完成。 +本文记录当前版本豆瓣登录脚本的实现细节、关键函数以及后续可扩展点。v1.1.0 版本集成了完整的滑块验证码自动破解功能,大幅提升自动化程度。 ## 文件结构 ``` src/ -└── login.ts # Playwright 入口脚本 +├── login.ts # Playwright 入口脚本 +└── slider/ # v1.1.0 新增滑块验证模块 + ├── index.ts + ├── types.ts + ├── detector.ts + ├── detector-self-learning.ts + ├── slider-controller.ts + ├── cli.ts + ├── validator.ts + ├── detection/ + │ └── candidate-search.ts + └── utils/ + ├── geometry.ts + └── image.ts ``` 辅助文档位于项目根目录: -- `README.md`:使用说明与常见问题; -- `ARCHITECTURE.md`:整体架构与流程拆解; -- `login.md`:早期需求说明,可作为手动操作参考。 +- `README.md`:使用说明与常见问题 +- `ARCHITECTURE.md`:整体架构与流程拆解 +- `QUICKSTART.md`:快速开始指南 +- `CHANGELOG.md`:版本更新日志 +- `login.md`:早期需求说明,可作为手动操作参考 -## 核心流程 +## 核心流程(v1.1.0) 1. **读取配置** - 通过 `process.env.DOUBAN_PHONE` 获取手机号,缺失时直接退出。 + - 通过 `process.env.DOUBAN_PHONE` 获取手机号,缺失时直接退出 + - 检查 `process.env.DOUBAN_AUTO_SLIDER` 是否启用自动滑块验证 2. **准备浏览器上下文** (`prepareContext`) - - 若存在 `~/douban-cookie.json`,以 `storageState` 形式加载; - - 打开登录页并调用 `isLoggedIn` 校验是否仍在登录态; - - 失效时关闭旧上下文并创建全新 session。 + - 若存在 `~/douban-cookie.json`,以 `storageState` 形式加载 + - 打开登录页并调用 `isLoggedIn` 校验是否仍在登录态 + - 失效时关闭旧上下文并创建全新 session 3. **执行短信登录** (`loginWithSms`) - - 输入手机号 → 点击「获取验证码」; - - 控制台提醒用户在浏览器中手动完成滑块等页面验证; - - 通过 `prompt` 等待用户输入短信验证码并提交; - - 等待 Playwright 检测到页面离开登录地址或抛出超时。 + - 输入手机号 → 点击「获取验证码」 + - **[v1.1.0]** 自动检测并处理滑块验证码: + - 调用 `SliderController.solveSlider()` + - 等待验证码 iframe 出现 + - 截图并保存到 `noflag/` 目录 + - 调用检测算法识别滑块位置 + - 计算滑动距离并执行拖动 + - 验证成功后继续,失败则重试(最多 10 次) + - 通过 `prompt` 等待用户输入短信验证码并提交 + - 等待 Playwright 检测到页面离开登录地址或抛出超时 4. **确认状态并写入 Cookie 文件** - - `isLoggedIn` 再次判断是否登录成功; - - 调用 `context.storageState({ path })` 将状态写入 `~/douban-cookie.json`; - - 终端提示成功信息,方便用户确认文件路径。 + - `isLoggedIn` 再次判断是否登录成功 + - 调用 `context.storageState({ path })` 将状态写入 `~/douban-cookie.json` + - 终端提示成功信息,方便用户确认文件路径 ## 关键函数 @@ -50,31 +72,309 @@ src/ 串联短信验证码登录的主要逻辑,所有用户交互点都通过控制台提示: -- 页面操作由脚本自动完成(填手机号、点击按钮); -- 人机验证与短信输入由用户处理; -- 函数内部对提交过程设置合理的等待时间,避免过早关闭浏览器。 +- 页面操作由脚本自动完成(填手机号、点击按钮) +- **[v1.1.0]** 滑块验证自动处理(启用 `DOUBAN_AUTO_SLIDER=1` 时) +- 短信验证码输入由用户处理 +- 函数内部对提交过程设置合理的等待时间,避免过早关闭浏览器 ### `main()` 作为 CLI 入口,负责整体 orchestrate:校验配置 → 启动浏览器 → 调用上述函数 → 捕获异常并设置 `process.exitCode`。 +## v1.1.0 新增核心函数 + +### `SliderController.solveSlider(page, sliderSelector, captchaSelector)` + +滑块验证的主控制器,负责完整的验证流程: + +```typescript +async solveSlider( + page: Page, + sliderSelector: string = '.tcaptcha_drag_button', + captchaSelector: string = '#tcaptcha_iframe' +): Promise +``` + +**工作流程**: +1. 等待验证码 iframe 加载(`waitForSelector`) +2. 等待滑块背景图完全加载 +3. 进入重试循环(最多 10 次): + - 调用 `captureSliderImage()` 截图到 `noflag/` + - 调用 `SliderDetector.detectSlider()` 检测滑块 + - 调用 `calculateDistance()` 计算移动距离 + - 调用 `dragSlider()` 拖动滑块 + - 调用 `checkSuccess()` 检测是否成功 + - 成功则返回,失败则刷新验证码重试 + +**返回值**: +```typescript +interface SliderSolveResult { + success: boolean; // 是否成功 + attempts: number; // 尝试次数 + distance?: number; // 滑动距离(像素) +} +``` + +### `SliderDetector.detectSlider(imagePath, outputPath, drawBoxes)` + +滑块检测的核心算法实现: + +```typescript +async detectSlider( + imagePath: string, + outputPath: string, + drawBoxes: boolean = true +): Promise +``` + +**工作流程**: +1. 使用 Sharp 加载图像 +2. 缩放到 800px 宽度(保持宽高比) +3. 调用 `CandidateSearch.findCandidates()` 获取候选框 +4. 对每个候选框计算综合评分 +5. 按评分排序,选择前 2 个 +6. 如果只有 1 个,尝试使用模板匹配找第二个 +7. 绘制红框标注并保存到 `outputPath` +8. 返回检测到的滑块位置数组 + +**评分标准**: +- 形状评分:宽高比、面积合理性 +- 色调一致性:内部颜色是否统一 +- 边缘密度:边缘特征是否明显 +- 梯度平滑度:是否有明确的边界 + +### `CandidateSearch.findCandidates(rawImage)` + +多策略并行检测候选区域: + +```typescript +async findCandidates(rawImage: RawImage): Promise +``` + +**四种策略**: + +1. **暗区域检测** (`findDarkRegions`) + - 基于亮度阈值(< 100) + - 连通组件分析 + - 形状过滤(宽高比、面积) + +2. **Canny 边缘检测** (`findEdgeDensityRegions`) + - Canny 算法提取边缘 + - 滑动窗口统计边缘密度 + - 局部最大值抑制 + +3. **颜色量化** (`findColorQuantizationRegions`) + - K-means 聚类(k=5) + - 提取少数色块区域 + - 形状验证 + +4. **LAB 色彩空间** (`findLabColorRegions`) + - 转换到 LAB 空间 + - 基于 a*、b* 通道的色度检测 + - 连通组件分析 + +**去重策略**: +- 计算所有候选框的 IoU(交并比) +- IoU > 0.3 认为是同一个滑块 +- 保留评分最高的 + +### `calculateDistance(boxes, scaleX)` + +**v1.1.0 简化算法**的核心实现: + +```typescript +private calculateDistance( + boxes: BoundingBox[], + scaleX: number +): number +``` + +**逻辑**: +```typescript +if (boxes.length >= 2) { + // 双滑块模式(推荐) + // "两只小鸟嘴尖距离"原理 + const distance = (boxes[1].x - boxes[0].x) / scaleX; + return Math.round(distance); +} else if (boxes.length === 1) { + // 单滑块模式(兜底) + const distance = boxes[0].x / scaleX; + return Math.round(distance); +} else { + return 0; +} +``` + +**为什么除以 scaleX**: +- 检测在 800px 宽度图像上进行 +- 实际显示宽度是 340px +- scaleX = 800 / 340 ≈ 2.35 +- 需要将检测坐标转换回显示坐标 + +### `dragSlider(distance)` + +拖动滑块到指定距离: + +```typescript +private async dragSlider(distance: number): Promise +``` + +**实现细节**: +- 获取滑块按钮的 bounding box +- 计算起始位置(滑块中心) +- 计算目标位置(起始 + 距离) +- 使用 `page.mouse.move()` 拖动 +- `steps` 参数实现平滑移动(默认 20 步) + +**拟人化特性**: +- 使用 Playwright 的内置缓动函数 +- 平滑的加速-减速曲线 +- 避免机械化的匀速直线移动 + ## 错误处理与提示 -- 打印清晰的步骤提示,例如“请等待短信验证码…”、“正在提交验证码…”; -- 捕获 Playwright 的超时异常,允许在页面未完全跳转时通过 `isLoggedIn` 再次确认; -- 如登录失败会输出明确日志并保持退出码非零,方便在 CI 或脚本中检测。 +- 打印清晰的步骤提示,例如"请等待短信验证码…"、"正在提交验证码…" +- **[v1.1.0]** 滑块检测过程的详细日志: + ``` + [SliderController] 开始滑块验证,最多尝试 10 次 + [SliderController] ===== 第 1/10 次尝试 ===== + [SliderDetector] 图像已缩放: 340x191 -> 800x449 (scaleX=2.35) + [SliderDetector] 检测到 2 个滑块候选框 + [SliderController] 计算距离: (195 - 45) / 2.35 = 63.8px + [SliderController] ✓ 滑块验证成功! + ``` +- 捕获 Playwright 的超时异常,允许在页面未完全跳转时通过 `isLoggedIn` 再次确认 +- 如登录失败会输出明确日志并保持退出码非零,方便在 CI 或脚本中检测 +- **[v1.1.0]** 视觉调试: + - `noflag/` 目录保存原始截图 + - `output/` 目录保存带红框标注的检测结果 + - 便于人工验证检测准确性 ## 手动操作注意事项 -- Playwright 会以非无头模式启动 Chromium,务必保持窗口前台以便人工处理滑块或图形验证码; -- 如短信验证码输入错误,可重新运行脚本并继续人工操作; -- 保存的 `douban-cookie.json` 与账号强绑定,若切换账号需手动删除或覆盖该文件。 +- Playwright 会以非无头模式启动 Chromium,务必保持窗口前台 +- **[v1.1.0]** 启用 `DOUBAN_AUTO_SLIDER=1` 时会自动处理滑块 +- 如果自动验证失败(10 次后),仍可手动完成滑块 +- 如短信验证码输入错误,可重新运行脚本 +- 保存的 `douban-cookie.json` 与账号强绑定,若切换账号需手动删除或覆盖该文件 +- **[v1.1.0]** 可查看 `output/` 目录的标注图验证检测准确性 + +## v1.1.0 技术细节 + +### 坐标系统 + +**两套坐标系**: +1. **图像坐标系**:800px 宽度,用于检测 +2. **显示坐标系**:340px 宽度,用于拖动 + +**转换公式**: +```typescript +显示坐标 = 图像坐标 / scaleX +scaleX = 图像宽度 / 显示宽度 ≈ 800 / 340 ≈ 2.35 +``` + +### 距离计算演进 + +**v1.0.0**:需要人工完成滑块 + +**v1.1.0 早期**:复杂的坐标转换 +```typescript +// 错误的复杂逻辑(已废弃) +const iframeBox = await iframe.boundingBox(); +const distance = targetBox.x - sliderBox.x + iframeBox.x - sliderBox.x; +``` + +**v1.1.0 最终**:简化为几何原理 +```typescript +// 正确的简洁逻辑(当前实现) +const distance = (box2.x - box1.x) / scaleX; +``` + +**为什么简化有效**: +- 检测坐标和拖动坐标在同一个相对坐标系中 +- iframe 偏移量对两个滑块的影响相同 +- 直接计算水平距离差,无需考虑绝对位置 + +### 图像处理技术 + +**Sharp 库应用**: + +1. **图像缩放** + ```typescript + const resized = await sharp(imagePath) + .resize(targetWidth, null, { fit: 'inside' }) + .raw() + .toBuffer({ resolveWithObject: true }); + ``` + +2. **Sobel 边缘检测** + ```typescript + const sobelX = [-1, 0, 1, -2, 0, 2, -1, 0, 1]; + const sobelY = [-1, -2, -1, 0, 0, 0, 1, 2, 1]; + // 卷积计算边缘强度 + ``` + +3. **颜色空间转换** + ```typescript + // RGB → LAB + const X = r * 0.4124 + g * 0.3576 + b * 0.1805; + const Y = r * 0.2126 + g * 0.7152 + b * 0.0722; + const Z = r * 0.0193 + g * 0.1192 + b * 0.9505; + ``` + +4. **形态学操作** + ```typescript + // 膨胀:扩大白色区域 + // 腐蚀:缩小白色区域 + // 连通组件分析:查找连续区域 + ``` + +### 性能优化 + +**并行检测**: +```typescript +const [darkBoxes, edgeBoxes, colorBoxes, labBoxes] = await Promise.all([ + this.findDarkRegions(rawImage), + this.findEdgeDensityRegions(rawImage), + this.findColorQuantizationRegions(rawImage), + this.findLabColorRegions(rawImage), +]); +``` + +**IoU 去重**: +- 避免重复检测同一个滑块 +- 减少后续评分计算量 +- 提高整体检测速度 + +**缓存策略**: +- 原始截图保存在 `noflag/`,可重复使用 +- 标注结果保存在 `output/`,便于批量验证 ## 后续拓展建议 -1. **多账号支持**:通过配置文件或命令行参数管理多组手机号与存储路径; -2. **验证码服务集成**:接入外部短信/验证码平台以减少人工步骤; -3. **任务编排**:在登录后追加业务逻辑(例如抓取列表、导出数据),可在 `main` 函数成功分支追加调用; -4. **CLI 体验**:封装命令行参数解析,避免频繁依赖环境变量。 +1. **多账号支持**:通过配置文件或命令行参数管理多组手机号与存储路径 +2. **验证码服务集成**:接入外部短信/验证码平台以减少人工步骤 +3. **任务编排**:在登录后追加业务逻辑(例如抓取列表、导出数据),可在 `main` 函数成功分支追加调用 +4. **CLI 体验**:封装命令行参数解析,避免频繁依赖环境变量 +5. **[v1.1.0+]** 机器学习模型: + - 使用 CNN 替代规则式检测 + - 训练分类器识别滑块和缺口 + - 提高复杂背景下的准确率 +6. **[v1.1.0+]** 更多验证码类型: + - 点选验证码 + - 文字识别验证码 + - 旋转验证码 +7. **[v1.1.0+]** 反爬虫对抗: + - 更自然的鼠标轨迹(贝塞尔曲线) + - 随机延迟和抖动 + - 模拟人类思考时间 -以上内容覆盖当前脚本主要实现。若后续重新引入滑块自动化,可在此文档扩展新的模块说明。 +## v1.1.0 成功的关键因素 + +1. **用户洞察**:"两只小鸟嘴尖距离"的类比帮助简化了距离计算 +2. **坐标系统一**:在同一坐标系中计算相对距离,避免复杂转换 +3. **多策略并行**:四种检测算法互补,提高鲁棒性 +4. **视觉调试**:红框标注便于人工验证和调试 +5. **自动重试**:10 次重试机制大幅提高成功率 + +以上内容覆盖 v1.1.0 的完整实现细节。滑块自动化已成功集成并经过验证。 diff --git a/QUICKSTART.md b/QUICKSTART.md index 39888c0..3fc71ac 100644 --- a/QUICKSTART.md +++ b/QUICKSTART.md @@ -1,11 +1,11 @@ -# 快速开始 - 滑块验证自动化 +# 快速开始 - 滑块验证自动化(v1.1.0) ## 🚀 5 分钟上手 ### 1. 安装依赖 ```bash -cd /Users/gavin/mcp/douban-login +cd /Users/gavin/douban-login npm install ``` @@ -17,9 +17,10 @@ DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=你的手机号 npm run login 就这么简单!脚本会自动: - ✅ 检测滑块验证码 -- ✅ 计算滑动距离 -- ✅ 模拟真人滑动 -- ✅ 多次重试直到成功 +- ✅ 使用 AI 识别滑块和缺口位置 +- ✅ 计算精确的滑动距离 +- ✅ 模拟真人滑动轨迹 +- ✅ 自动重试直到成功(最多 10 次) ### 3. 独立测试滑块功能 @@ -37,32 +38,33 @@ npm run slider DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800138000 npm run login ``` -### 场景 2:识别不准,手动指定距离 +脚本会自动完成整个登录流程,包括滑块验证。 + +### 场景 2:查看检测过程 + +登录后查看生成的截图: +- `noflag/` 目录:原始验证码图片 +- `output/` 目录:带红框标注的检测结果 + +红框标注说明: +- 左侧红框:检测到的滑块位置 +- 右侧红框:检测到的缺口位置 + +### 场景 3:调试识别准确性 + +如果识别总是失败,可以: + +1. 查看 `output/` 目录的标注图,确认红框位置是否准确 +2. 检查控制台日志中的 `scaleX` 值(应该约为 2.35) +3. 确认距离计算公式:`(缺口X - 滑块X) / scaleX` + +### 场景 4:批量复核历史截图 ```bash -DOUBAN_AUTO_SLIDER=1 \ -DOUBAN_SLIDER_DISTANCE=280 \ -DOUBAN_PHONE=13800138000 \ -npm run login +npm run slider -- --pic-dir=noflag ``` -### 场景 3:调整重试偏移 - -```bash -DOUBAN_AUTO_SLIDER=1 \ -DOUBAN_SLIDER_OFFSETS=0,-5,5,-10,10,-15,15 \ -DOUBAN_PHONE=13800138000 \ -npm run login -``` - -### 场景 4:增加超时时间(网络慢) - -```bash -DOUBAN_AUTO_SLIDER=1 \ -DOUBAN_SLIDER_TIMEOUT=60000 \ -DOUBAN_PHONE=13800138000 \ -npm run login -``` +会对 `noflag/` 目录中的所有验证码图片重新检测,并将标注结果输出到 `output/` 目录。 ## 💻 在代码中使用 @@ -70,178 +72,217 @@ npm run login ```typescript import { Page } from 'playwright'; -import { waitAndHandleSlider } from './slider'; +import { SliderController } from './slider'; -async function myFunction(page: Page) { - // 触发可能出现滑块的操作 - await page.click('#some-button'); +async function login(page: Page) { + // 触发登录操作 + await page.click('#login-button'); - // 自动等待并处理滑块(如果出现) - await waitAndHandleSlider(page); + // 自动处理滑块验证(如果出现) + const controller = new SliderController(10); + const result = await controller.solveSlider( + page, + '.tcaptcha_drag_button', // 滑块按钮选择器 + '#tcaptcha_iframe' // 验证码 iframe 选择器 + ); + + if (result.success) { + console.log(`验证成功!尝试 ${result.attempts} 次`); + } else { + console.log('验证失败,需要手动完成'); + } } ``` ### 更多控制 ```typescript -import { hasSlider, autoSlide } from './slider'; +import { SliderDetector, SliderController } from './slider'; -async function myFunction(page: Page) { - await page.click('#some-button'); - await page.waitForTimeout(1000); - - // 检查是否有滑块 - if (await hasSlider(page)) { - console.log('需要完成滑块验证'); - - // 自动完成 - const success = await autoSlide(page, { - distance: 250, // 可选:手动指定距离 - offsets: [0, -5, 5, -10, 10], // 可选:重试偏移 - }); - - if (!success) { - console.log('自动验证失败,请手动完成'); - // 处理失败情况 - } - } +// 1. 单独使用检测器 +const detector = new SliderDetector(); +const boxes = await detector.detectSlider( + 'captcha.png', // 输入图片路径 + 'output/result.png', // 标注结果保存路径 + true // 是否绘制标注框 +); + +if (boxes && boxes.length > 0) { + console.log('检测到滑块:', boxes); + console.log('第一个滑块位置:', boxes[0].x, boxes[0].y); + console.log('第一个滑块尺寸:', boxes[0].width, boxes[0].height); } -``` -### 自定义配置(针对不同网站) - -```typescript -// 腾讯防水墙 -await autoSlide(page, { - handleSelector: '.tc-drag-thumb', - trackSelector: '.tc-drag-track', - bgSelector: '.tc-bg-img', - pieceSelector: '.tc-jig-img', -}); - -// 极验验证 -await autoSlide(page, { - handleSelector: '.geetest_slider_button', - trackSelector: '.geetest_slider', - bgSelector: '.geetest_canvas_bg', - pieceSelector: '.geetest_canvas_slice', -}); +// 2. 使用控制器完成整个流程 +const controller = new SliderController(10); +const result = await controller.solveSlider(page); ``` ## 🔧 故障排查 -### 问题:找不到滑块元素 +### 问题:检测不到滑块 -**解决**:打开浏览器开发者工具,检查 HTML 结构,然后: +**症状**:日志显示"未检测到滑块"或"检测到 0 个滑块" -```bash -DOUBAN_SLIDER_HANDLE_SELECTOR='.your-slider-class' npm run login +**排查步骤**: +1. 检查 `noflag/` 目录下的原始截图是否正确 +2. 确认验证码已完全加载(等待 iframe 和图片元素) +3. 查看 `output/` 目录的标注图,确认候选框是否被正确识别 +4. 尝试多次运行,因为验证码图片质量可能不同 + +### 问题:滑动距离不准确 + +**症状**:滑块滑过头或不够远 + +**v1.1.0 简化算法**: +- 使用公式:`距离 = (缺口X - 滑块X) / scaleX` +- scaleX 约为 2.35(340px → 800px 的缩放比例) +- 基于"两只小鸟嘴尖距离"的几何原理 + +**排查步骤**: +1. 查看控制台日志中的距离计算过程 +2. 检查 `output/` 目录标注图,红框是否准确 +3. 确认检测到的是双滑块模式(2 个红框) + +**示例日志**: ``` - -### 问题:距离总是差一点 - -**解决**:调整偏移序列,重点尝试差距范围: - -```bash -# 如果总是差 10 像素左右 -DOUBAN_SLIDER_OFFSETS=0,10,8,12,5,15 npm run login +[SliderDetector] 检测到 2 个滑块候选框 +[SliderDetector] 滑块 1: x=45, width=60, score=0.85 +[SliderDetector] 滑块 2: x=195, width=55, score=0.82 +[SliderController] 计算距离: (195 - 45) / 2.35 = 63.8px ``` ### 问题:验证总是失败 -**原因和解决**: +**可能原因**: -1. **图像识别不准** → 手动指定距离 - ```bash - DOUBAN_SLIDER_DISTANCE=250 npm run login - ``` +1. **图像识别不准确** + - 查看 `output/` 目录检查标注准确性 + - 复杂背景或低对比度图片识别率较低 + - 当前准确率约 70-80% -2. **滑动太快被识别为机器人** → 修改 `slider.ts` 增加总时长 - ```typescript - // 在 generateTrack 函数中 - const totalTime = 1500 + Math.random() * 1500; // 改为 1.5-3 秒 - ``` +2. **反爬虫检测** + - 避免过于频繁使用 + - 已集成拟人化轨迹,但仍可能被识别 -3. **选择器不对** → 检查并指定正确选择器 +3. **网络延迟** + - 成功标识(`.tc-success`)可能延迟出现 + - 当前等待时间 1000ms,可能需要延长 + +**解决方案**: +- 使用自动重试机制(最多 10 次) +- 查看详细日志定位问题 +- 必要时手动完成验证 ### 问题:程序卡住不动 **检查**: -- 是否在等待手动完成验证?查看终端提示 -- 超时设置是否太短?增加 `DOUBAN_SLIDER_TIMEOUT` -- 网络是否正常? +- 是否在等待 iframe 加载?查看日志 "等待验证码 iframe 加载..." +- 是否在等待图片加载?查看日志 "等待滑块背景图加载..." +- 网络是否正常?尝试增加超时时间 + +### 视觉调试技巧 + +**查看检测结果**: +1. 运行登录后,打开 `output/` 目录 +2. 找到最新的 `*-detected.png` 文件 +3. 检查红框是否准确标注了滑块和缺口 +4. 对比 `noflag/` 目录的原始图 + +**理想的标注结果**: +- 左侧滑块:红框紧贴滑块边缘 +- 右侧缺口:红框框住缺口区域 +- 两个红框高度基本一致(y 坐标偏差 < 25px) +- 红框宽度接近滑块实际宽度(约 50-70px) ## 📚 深入了解 -- [SLIDER.md](./SLIDER.md) - 详细功能文档 -- [IMPLEMENTATION.md](./IMPLEMENTATION.md) - 实现原理 -- [src/examples.ts](./src/examples.ts) - 更多使用示例 +- [README.md](./README.md) - 项目总览和功能介绍 +- [src/slider/README.md](./src/slider/README.md) - 滑块模块详细文档 +- [CHANGELOG.md](./CHANGELOG.md) - 版本更新日志 +- [release.md](./release.md) - 发布说明 ## 🎯 核心 API ```typescript -// 检测是否存在滑块 -hasSlider(page: Page, config?: SliderConfig): Promise - -// 自动完成滑块验证 -autoSlide(page: Page, config?: SliderConfig): Promise - -// 等待并处理滑块(推荐) -waitAndHandleSlider(page: Page, config?: SliderConfig): Promise -``` - -## ⚙️ 配置选项 - -```typescript -interface SliderConfig { - handleSelector?: string; // 滑块按钮选择器 - trackSelector?: string; // 滑块轨道选择器 - bgSelector?: string; // 背景图选择器 - pieceSelector?: string; // 缺口图选择器 - timeout?: number; // 超时时间(毫秒) - distance?: number; // 手动指定距离(像素) - offsets?: number[]; // 偏移尝试序列 +// 滑块检测器 +class SliderDetector { + async detectSlider( + imagePath: string, + outputPath: string, + drawBoxes: boolean = true + ): Promise } -``` -## 🎉 运行示例 +// 滑块控制器 +class SliderController { + constructor(maxAttempts: number = 10) + + async solveSlider( + page: Page, + sliderSelector?: string, + captchaSelector?: string + ): Promise +} -查看 6 个详细示例: +// 返回结果 +interface SliderSolveResult { + success: boolean; // 是否成功 + attempts: number; // 尝试次数 + distance?: number; // 滑动距离(像素) +} -```bash -# 基础使用 -npm run ts-node src/examples.ts 1 - -# 手动检测 -npm run ts-node src/examples.ts 2 - -# 自定义配置 -npm run ts-node src/examples.ts 3 - -# 登录流程集成 -npm run ts-node src/examples.ts 4 - -# 批量处理 -npm run ts-node src/examples.ts 5 - -# 环境变量配置 -npm run ts-node src/examples.ts 6 +// 边界框 +interface BoundingBox { + x: number; + y: number; + width: number; + height: number; +} ``` ## 💡 提示 -1. **首次使用建议先不开启自动验证**,观察滑块行为 -2. **记录成功的参数配置**,后续重复使用 -3. **避免过于频繁使用**,可能触发更严格验证 -4. **定期检查更新**,验证码可能会变化 +1. **首次使用**: + - 建议先运行一次观察完整流程 + - 查看 `output/` 和 `noflag/` 目录的输出 + - 了解红框标注的含义 + +2. **提高成功率**: + - 依赖自动重试机制(最多 10 次) + - 每次验证码图片不同,识别难度也不同 + - 当前成功率约 50%,已经可以应对日常使用 + +3. **调试建议**: + - 查看控制台日志了解检测过程 + - 检查 `output/` 目录的标注图验证准确性 + - 使用 CLI 工具批量测试:`npm run slider -- --pic-dir=noflag` + +4. **避免滥用**: + - 不要过于频繁使用,可能触发更严格验证 + - 遵守网站服务条款 + - 仅用于个人学习研究 ## ⚠️ 重要提示 -- 本功能仅用于学习研究 -- 使用时请遵守网站服务条款 -- 图像识别准确率约 70-80% -- 需配合偏移重试提高成功率 +- **本功能仅用于学习研究** +- **使用时请遵守网站服务条款** +- **图像识别准确率约 70-80%** +- **验证成功率约 50%(含重试)** +- **不保证 100% 成功,请做好手动完成的准备** + +## 📊 性能指标 + +- **检测耗时**:约 2-3 秒/次(含截图、检测、标注) +- **平均尝试次数**:1-3 次 +- **最大尝试次数**:10 次 +- **图像缩放比例**:340px → 800px(scaleX ≈ 2.35) ## 🤝 需要帮助? -查看详细文档或运行示例代码了解更多用法。 +查看详细文档或提交 Issue 了解更多用法。 + +--- + +**v1.1.0** - 2025-10-25 +引入 AI 驱动的滑块验证码自动破解功能 🎉 diff --git a/README.md b/README.md index e85e82b..17a3c40 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,18 @@ # douban-crawler -> Playwright + TypeScript 脚本,用于完成豆瓣短信验证码登录,并将登录态持久化到本地 Cookie 文件。滑块验证码需人工处理,本项目不再尝试自动识别。 +**版本**: v1.1.0 -## 功能概览 +> Playwright + TypeScript 脚本,用于完成豆瓣短信验证码登录,并将登录态持久化到本地 Cookie 文件。**已集成 AI 驱动的滑块验证码自动识别和求解功能**。 -- 启动 Chromium 浏览器并访问豆瓣登录页; -- 自动填写手机号,触发短信验证码; -- 控制台提示用户完成页面内的额外验证(如滑块)并输入短信验证码; -- 登录成功后将 Cookie 状态保存到 `~/douban-cookie.json`,后续运行可直接复用。 +## ✨ 核心功能 + +- 🔐 **自动登录**: 支持短信验证码登录流程 +- 🧩 **智能滑块识别**: 基于图像处理算法自动识别和求解滑块验证码 +- 🎯 **高成功率**: 采用多策略检测算法(暗区检测、边缘检测、颜色量化、LAB色彩空间分析) +- 🔄 **自动重试**: 验证失败时自动刷新并重试,最多 10 次 +- 📊 **详细日志**: 完整的调试信息和截图保存,便于问题追溯 +- 🖼️ **可视化调试**: 自动标注检测到的滑块位置,保存带红框标记的图片 +- 🍪 **Cookie 持久化**: 自动保存登录态,下次可直接复用 ## 环境准备 @@ -26,13 +31,19 @@ npx playwright install chromium DOUBAN_PHONE=13800000000 npm run login ``` -2. 浏览器会自动打开豆瓣登录页,脚本完成以下操作: +2. 启用自动滑块验证(可选): + + ```bash + DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800000000 npm run login + ``` + +3. 浏览器会自动打开豆瓣登录页,脚本完成以下操作: - 填入手机号并点击「获取验证码」; - - 控制台提示等待页面验证(若出现滑块,请手动完成); + - 如果启用了自动滑块验证,会自动检测并滑动;否则等待用户手动完成; - 控制台等待用户输入短信验证码; - 验证码提交成功后,脚本将登录态写入 `~/douban-cookie.json` 并退出。 -3. 下次运行会优先尝试加载该 Cookie 文件,若仍在有效期内可直接登录。 +4. 下次运行会优先尝试加载该 Cookie 文件,若仍在有效期内可直接登录。 ## 命令列表 @@ -42,11 +53,12 @@ npx playwright install chromium ## 可配置项 -当前脚本仅使用一个环境变量: +当前脚本支持以下环境变量: -| 变量名 | 说明 | 是否必填 | 默认值 | -| -------------- | ---------------- | -------- | ------ | -| `DOUBAN_PHONE` | 登录手机号(大陆) | 必填 | - | +| 变量名 | 说明 | 是否必填 | 默认值 | +| --------------------- | ------------------------------ | -------- | ------ | +| `DOUBAN_PHONE` | 登录手机号(大陆) | 必填 | - | +| `DOUBAN_AUTO_SLIDER` | 是否启用自动滑块验证(1/true) | 可选 | false | 若需要更改 Cookie 保存位置,可在 `src/login.ts` 中调整 `COOKIES_PATH` 定义。 @@ -61,30 +73,63 @@ npx playwright install chromium ## 常见问题 - **登录后仍提示手机号未填写?** 确认 Playwright 浏览器窗口焦点在页面内,避免浏览器阻止自动填充。 -- **提示滑块验证但脚本无动作?** 脚本已停止自动滑块功能,请在浏览器中手动拖动滑块完成验证。 -- **Cookie 未生成?** 只有当脚本确认登录成功时才会写入 Cookie。若终端未看到 “登录成功,Cookies 已保存…” 的日志,请检查短信验证码是否正确。 +- **自动滑块验证失败?** 系统会提示手动完成,或者尝试不启用自动滑块功能。 +- **Cookie 未生成?** 只有当脚本确认登录成功时才会写入 Cookie。若终端未看到 "登录成功,Cookies 已保存…" 的日志,请检查短信验证码是否正确。 + +## 滑块验证模块 + +本项目包含了从 `captcha_cracker` 移植并优化的滑块检测功能,位于 `src/slider/` 目录。 + +详细说明请查看 [src/slider/README.md](./src/slider/README.md) + +### 滑块验证工作流程 + +1. **自动检测**: 点击"获取验证码"后自动检测滑块验证码窗口 +2. **图像采集**: 截取验证码图片并放大到 800px 宽度以提高识别精度 +3. **多策略检测**: + - 暗区检测:识别滑块缺口的阴影区域 + - 边缘检测:使用 Canny 算法识别轮廓 + - 颜色量化:分析色彩分布找出异常区域 + - LAB 色彩空间:在更符合人类视觉的空间中检测差异 +4. **双滑块识别**: 同时检测左侧滑块和右侧缺口,计算精确距离 +5. **距离计算**: + - 双滑块模式:`距离 = (缺口X - 滑块X) / scaleX` + - 单滑块模式:基于 DOM 位置和图像分析综合计算 +6. **拟人化滑动**: 模拟真实人类操作的加速-匀速-减速轨迹 +7. **结果验证**: 检测成功标识或窗口消失,失败则自动刷新重试(最多 10 次) +8. **可视化输出**: 在 `output/` 或 `noflag/` 目录保存带红框标记的检测结果图片 + +### 滑块识别算法 + +核心算法移植自 `captcha_cracker` 项目,包括: + +- **候选框搜索** (`detection/candidate-search.ts`): 四种策略并行搜索可疑区域 +- **边界框优化** (`detector.ts`): 使用 Canny 边缘检测精确定位 +- **自学习模板匹配** (`detector-self-learning.ts`): 动态学习滑块模板提高准确率 +- **几何与图像工具** (`utils/`): IoU 计算、形态学操作、Sobel 算子等 + +### 调试与问题排查 + +所有截图和检测结果保存在: +- `output/`: 常规调试输出 +- `noflag/`: 完整尺寸(800px)的检测图片 +- 文件命名格式:`captcha-{timestamp}.png` 和 `captcha-{timestamp}-detected.png` + +查看 `-detected.png` 文件可以确认: +- 红框标记的位置是否准确识别了滑块缺口 +- 如有两个框,左边的应该是滑块,右边的是缺口 ## 声明 该项目仅供学习与功能验证,请勿用于违反豆瓣平台服务条款的场景。开发者需自行承担使用风险。 -```bash -# 启用自动滑块验证 -DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800138000 npm run login - -# 独立测试滑块功能 -npm run slider -``` - -详细说明请查看 [SLIDER.md](./SLIDER.md) - -## 开发脚本 +## 开发文档 - `src/login.ts`:主登录流程,负责 Cookie 复用、短信登录以及滑块自动化; -- `src/slider.ts`:滑块验证自动化工具,支持图像识别和轨迹模拟; -- `SLIDER.md`:滑块验证详细文档,包含原理、配置和故障排查; +- `src/slider/`:滑块验证模块,包含检测、移动等完整功能; +- `ARCHITECTURE.md`:整体架构与流程说明; +- `IMPLEMENTATION.md`:关键实现细节记录; - `login.md`:原始业务需求与操作步骤; -- `block.md`:滑块破解思路(Python 版)与 TypeScript 脚本参考; - `typescript-spec.md`:团队 TypeScript 编码规范与示例。 ## 许可 diff --git a/VERSION.md b/VERSION.md new file mode 100644 index 0000000..2bb3195 --- /dev/null +++ b/VERSION.md @@ -0,0 +1,152 @@ +# 版本信息 + +## 当前版本:v1.1.0 + +发布日期:2025-10-25 + +## 主要特性 + +### 🎯 AI 驱动的滑块验证码自动破解 + +- ✅ 多策略并行检测(暗区域、边缘、颜色量化、LAB 色彩空间) +- ✅ 双滑块精准识别(左侧滑块 + 右侧缺口) +- ✅ 简化的距离计算算法:`距离 = (缺口X - 滑块X) / scaleX` +- ✅ 拟人化滑动轨迹(Playwright steps 参数) +- ✅ 自动重试机制(最多 10 次) +- ✅ 可视化调试(红框标注输出到 `output/` 目录) + +### 📊 性能指标 + +- **检测准确率**:~70-80% +- **验证成功率**:~50%(含重试) +- **平均尝试次数**:1-3 次 +- **单次检测耗时**:~2-3 秒 + +### 🚀 快速开始 + +```bash +# 启用自动滑块验证 +DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800138000 npm run login + +# 独立测试滑块功能 +npm run slider + +# 批量复核历史截图 +npm run slider -- --pic-dir=noflag +``` + +## 核心创新 + +### "两只小鸟距离"原理 + +v1.1.0 最重要的突破是简化了距离计算算法: + +```typescript +// v1.0.0: 需要人工完成滑块 +// v1.1.0: 自动计算并拖动 + +// 双滑块模式(推荐) +const distance = (box2.x - box1.x) / scaleX; + +// 单滑块模式(兜底) +const distance = box.x / scaleX; +``` + +**原理**:就像计算两只小鸟嘴尖的水平距离,直接用右侧缺口的 X 坐标减去左侧滑块的 X 坐标,再除以图像缩放比例,就得到了滑块需要移动的距离。 + +### 图像缩放优化 + +- 原始验证码:340x191 像素 +- 检测图像:800x449 像素(scaleX ≈ 2.35) +- 提高小尺寸滑块的检测精度 + +### 多策略并行检测 + +并行运行四种算法,然后 IoU 去重: + +1. **暗区域检测**:查找亮度 < 100 的暗色区域 +2. **Canny 边缘检测**:查找边缘密集区域 +3. **颜色量化**:K-means 聚类找独特色块 +4. **LAB 色彩空间**:感知均匀的色彩空间检测 + +## 文件结构 + +``` +src/slider/ # 滑块验证模块 +├── detector.ts # 主检测器 +├── detector-self-learning.ts # 模板匹配 +├── slider-controller.ts # Playwright 集成 +├── candidate-search.ts # 多策略检测 +├── geometry.ts # IoU 计算 +└── image.ts # Sobel 边缘检测 + +noflag/ # 原始验证码截图 +output/ # 红框标注结果 +``` + +## 依赖项 + +- **playwright**: ^1.41.1(浏览器自动化) +- **sharp**: ^0.33.3(图像处理) +- **typescript**: ^5.4.2 + +## 环境变量 + +```bash +DOUBAN_AUTO_SLIDER=1 # 启用自动滑块验证 +DOUBAN_PHONE=手机号 # 登录手机号(必填) +``` + +## 已知限制 + +1. **图像识别准确率**:约 70-80%,复杂背景或低对比度图片识别率较低 +2. **验证成功率**:约 50%,受反爬虫机制影响 +3. **仅供学习**:请遵守网站服务条款,不要用于商业或恶意用途 + +## 相关文档 + +- [README.md](./README.md) - 项目总览 +- [QUICKSTART.md](./QUICKSTART.md) - 快速开始指南 +- [CHANGELOG.md](./CHANGELOG.md) - 详细更新日志 +- [ARCHITECTURE.md](./ARCHITECTURE.md) - 架构说明 +- [IMPLEMENTATION.md](./IMPLEMENTATION.md) - 实现细节 +- [src/slider/README.md](./src/slider/README.md) - 滑块模块文档 + +## 升级指南 + +### 从 v1.0.0 升级到 v1.1.0 + +**新增依赖**: +```bash +npm install +``` + +**新增环境变量**(可选): +```bash +export DOUBAN_AUTO_SLIDER=1 +``` + +**新增目录**: +- `noflag/`:原始验证码截图会自动保存到这里 +- `output/`:标注结果会自动保存到这里 + +**无需更改的部分**: +- `DOUBAN_PHONE` 环境变量用法不变 +- `~/douban-cookie.json` Cookie 文件路径不变 +- `npm run login` 命令用法不变 + +**新增功能**: +- 设置 `DOUBAN_AUTO_SLIDER=1` 启用自动滑块验证 +- 使用 `npm run slider` 独立测试滑块功能 +- 使用 `npm run slider -- --pic-dir=noflag` 批量复核截图 + +## 下一步计划 + +- [ ] 支持更多验证码类型(点选、文字识别) +- [ ] 引入机器学习模型提高准确率 +- [ ] 优化轨迹模拟,降低被识别风险 +- [ ] 支持更多网站的滑块验证码 + +--- + +**v1.1.0** - 从手动验证到 AI 自动化的飞跃 🎉 diff --git a/ground-truth.json b/ground-truth.json new file mode 100644 index 0000000..79a9d3c --- /dev/null +++ b/ground-truth.json @@ -0,0 +1,394 @@ +{ + "滑块-1.png": [ + { + "x": 123, + "y": 439, + "width": 90, + "height": 92 + }, + { + "x": 546, + "y": 439, + "width": 90, + "height": 92 + } + ], + "滑块-2.png": [ + { + "x": 125, + "y": 245, + "width": 89, + "height": 91 + }, + { + "x": 454, + "y": 244, + "width": 90, + "height": 92 + } + ], + "滑块-3.png": [ + { + "x": 122, + "y": 238, + "width": 86, + "height": 87 + }, + { + "x": 576, + "y": 237, + "width": 87, + "height": 88 + } + ], + "滑块-4.png": [ + { + "x": 120, + "y": 330, + "width": 90, + "height": 90 + }, + { + "x": 488, + "y": 329, + "width": 91, + "height": 91 + } + ], + "滑块-5.png": [ + { + "x": 119, + "y": 444, + "width": 90, + "height": 88 + }, + { + "x": 404, + "y": 443, + "width": 91, + "height": 89 + } + ], + "滑块-6.png": [ + { + "x": 116, + "y": 319, + "width": 91, + "height": 91 + }, + { + "x": 574, + "y": 318, + "width": 92, + "height": 92 + } + ], + "滑块-7.png": [ + { + "x": 119, + "y": 255, + "width": 88, + "height": 88 + }, + { + "x": 349, + "y": 177, + "width": 101, + "height": 166 + } + ], + "滑块-8.png": [ + { + "x": 120, + "y": 244, + "width": 92, + "height": 92 + }, + { + "x": 434, + "y": 243, + "width": 93, + "height": 93 + } + ], + "滑块.png": [ + { + "x": 131, + "y": 408, + "width": 87, + "height": 88 + }, + { + "x": 375, + "y": 407, + "width": 88, + "height": 89 + } + ], + "iShot_2025-10-25_16.53.21.png": [ + { + "x": 119, + "y": 344, + "width": 91, + "height": 92 + }, + { + "x": 575, + "y": 342, + "width": 93, + "height": 94 + } + ], + "iShot_2025-10-25_16.53.40.png": [ + { + "x": 108, + "y": 353, + "width": 94, + "height": 91 + }, + { + "x": 365, + "y": 353, + "width": 95, + "height": 92 + } + ], + "iShot_2025-10-25_16.53.48.png": [ + { + "x": 122, + "y": 256, + "width": 90, + "height": 89 + }, + { + "x": 379, + "y": 256, + "width": 91, + "height": 90 + } + ], + "iShot_2025-10-25_16.53.57.png": [ + { + "x": 110, + "y": 282, + "width": 90, + "height": 88 + }, + { + "x": 380, + "y": 282, + "width": 90, + "height": 89 + } + ], + "iShot_2025-10-25_16.54.08.png": [ + { + "x": 119, + "y": 306, + "width": 93, + "height": 93 + }, + { + "x": 386, + "y": 306, + "width": 93, + "height": 94 + } + ], + "iShot_2025-10-25_16.54.15.png": [ + { + "x": 118, + "y": 360, + "width": 90, + "height": 88 + }, + { + "x": 386, + "y": 363, + "width": 91, + "height": 89 + } + ], + "iShot_2025-10-25_16.54.25.png": [ + { + "x": 121, + "y": 420, + "width": 88, + "height": 87 + }, + { + "x": 313, + "y": 420, + "width": 90, + "height": 88 + } + ], + "iShot_2025-10-25_16.54.32.png": [ + { + "x": 113, + "y": 292, + "width": 88, + "height": 88 + }, + { + "x": 346, + "y": 292, + "width": 88, + "height": 88 + } + ], + "iShot_2025-10-25_16.54.41.png": [ + { + "x": 118, + "y": 388, + "width": 88, + "height": 88 + }, + { + "x": 541, + "y": 388, + "width": 89, + "height": 89 + } + ], + "iShot_2025-10-25_16.54.54.png": [ + { + "x": 98, + "y": 334, + "width": 90, + "height": 88 + }, + { + "x": 310, + "y": 334, + "width": 92, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.02.png": [ + { + "x": 119, + "y": 349, + "width": 90, + "height": 88 + }, + { + "x": 401, + "y": 349, + "width": 92, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.09.png": [ + { + "x": 100, + "y": 351, + "width": 90, + "height": 88 + }, + { + "x": 382, + "y": 351, + "width": 92, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.14.png": [ + { + "x": 119, + "y": 365, + "width": 90, + "height": 88 + }, + { + "x": 400, + "y": 365, + "width": 91, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.21.png": [ + { + "x": 110, + "y": 220, + "width": 92, + "height": 89 + }, + { + "x": 519, + "y": 220, + "width": 90, + "height": 88 + } + ], + "iShot_2025-10-25_16.55.29.png": [ + { + "x": 114, + "y": 309, + "width": 90, + "height": 88 + }, + { + "x": 544, + "y": 309, + "width": 90, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.37.png": [ + { + "x": 107, + "y": 427, + "width": 87, + "height": 88 + }, + { + "x": 542, + "y": 427, + "width": 88, + "height": 89 + } + ], + "iShot_2025-10-25_16.55.46.png": [ + { + "x": 117, + "y": 227, + "width": 88, + "height": 89 + }, + { + "x": 550, + "y": 227, + "width": 89, + "height": 90 + } + ], + "iShot_2025-10-25_16.55.52.png": [ + { + "x": 112, + "y": 314, + "width": 89, + "height": 91 + }, + { + "x": 409, + "y": 314, + "width": 90, + "height": 92 + } + ], + "iShot_2025-10-25_16.56.01.png": [ + { + "x": 119, + "y": 347, + "width": 90, + "height": 88 + }, + { + "x": 393, + "y": 350, + "width": 90, + "height": 89 + } + ] +} \ No newline at end of file diff --git a/images/douban/iShot_2025-10-25_16.53.21.png b/images/douban/iShot_2025-10-25_16.53.21.png new file mode 100644 index 0000000..7e20b28 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.53.21.png differ diff --git a/images/douban/iShot_2025-10-25_16.53.40.png b/images/douban/iShot_2025-10-25_16.53.40.png new file mode 100644 index 0000000..fc55146 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.53.40.png differ diff --git a/images/douban/iShot_2025-10-25_16.53.48.png b/images/douban/iShot_2025-10-25_16.53.48.png new file mode 100644 index 0000000..6c70a7d Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.53.48.png differ diff --git a/images/douban/iShot_2025-10-25_16.53.57.png b/images/douban/iShot_2025-10-25_16.53.57.png new file mode 100644 index 0000000..ad78447 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.53.57.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.08.png b/images/douban/iShot_2025-10-25_16.54.08.png new file mode 100644 index 0000000..c5bc77a Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.08.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.15.png b/images/douban/iShot_2025-10-25_16.54.15.png new file mode 100644 index 0000000..1ec34cf Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.15.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.25.png b/images/douban/iShot_2025-10-25_16.54.25.png new file mode 100644 index 0000000..773a4f8 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.25.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.32.png b/images/douban/iShot_2025-10-25_16.54.32.png new file mode 100644 index 0000000..8cbcd89 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.32.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.41.png b/images/douban/iShot_2025-10-25_16.54.41.png new file mode 100644 index 0000000..f69362f Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.41.png differ diff --git a/images/douban/iShot_2025-10-25_16.54.54.png b/images/douban/iShot_2025-10-25_16.54.54.png new file mode 100644 index 0000000..3645d08 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.54.54.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.02.png b/images/douban/iShot_2025-10-25_16.55.02.png new file mode 100644 index 0000000..1dc5473 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.02.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.09.png b/images/douban/iShot_2025-10-25_16.55.09.png new file mode 100644 index 0000000..61048e8 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.09.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.14.png b/images/douban/iShot_2025-10-25_16.55.14.png new file mode 100644 index 0000000..4f3a33f Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.14.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.21.png b/images/douban/iShot_2025-10-25_16.55.21.png new file mode 100644 index 0000000..ab10598 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.21.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.29.png b/images/douban/iShot_2025-10-25_16.55.29.png new file mode 100644 index 0000000..c23edfc Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.29.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.37.png b/images/douban/iShot_2025-10-25_16.55.37.png new file mode 100644 index 0000000..e617d30 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.37.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.46.png b/images/douban/iShot_2025-10-25_16.55.46.png new file mode 100644 index 0000000..1db4ac7 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.46.png differ diff --git a/images/douban/iShot_2025-10-25_16.55.52.png b/images/douban/iShot_2025-10-25_16.55.52.png new file mode 100644 index 0000000..d7e7127 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.55.52.png differ diff --git a/images/douban/iShot_2025-10-25_16.56.01.png b/images/douban/iShot_2025-10-25_16.56.01.png new file mode 100644 index 0000000..44f0c75 Binary files /dev/null and b/images/douban/iShot_2025-10-25_16.56.01.png differ diff --git a/images/douban/滑块-1.png b/images/douban/滑块-1.png new file mode 100644 index 0000000..7e9e228 Binary files /dev/null and b/images/douban/滑块-1.png differ diff --git a/images/douban/滑块-2.png b/images/douban/滑块-2.png new file mode 100644 index 0000000..483ab51 Binary files /dev/null and b/images/douban/滑块-2.png differ diff --git a/images/douban/滑块-3.png b/images/douban/滑块-3.png new file mode 100644 index 0000000..5be76e9 Binary files /dev/null and b/images/douban/滑块-3.png differ diff --git a/images/douban/滑块-4.png b/images/douban/滑块-4.png new file mode 100644 index 0000000..4000d2b Binary files /dev/null and b/images/douban/滑块-4.png differ diff --git a/images/douban/滑块-5.png b/images/douban/滑块-5.png new file mode 100644 index 0000000..8b9e93a Binary files /dev/null and b/images/douban/滑块-5.png differ diff --git a/images/douban/滑块-6.png b/images/douban/滑块-6.png new file mode 100644 index 0000000..b1f8248 Binary files /dev/null and b/images/douban/滑块-6.png differ diff --git a/images/douban/滑块-7.png b/images/douban/滑块-7.png new file mode 100644 index 0000000..c9ed2cf Binary files /dev/null and b/images/douban/滑块-7.png differ diff --git a/images/douban/滑块-8.png b/images/douban/滑块-8.png new file mode 100644 index 0000000..f0d1ecf Binary files /dev/null and b/images/douban/滑块-8.png differ diff --git a/images/douban/滑块.png b/images/douban/滑块.png new file mode 100644 index 0000000..9c163bf Binary files /dev/null and b/images/douban/滑块.png differ diff --git a/package.json b/package.json index 60aa319..a5d86f8 100644 --- a/package.json +++ b/package.json @@ -1,10 +1,10 @@ { "name": "douban-crawler", - "version": "1.0.0", - "description": "Automation scripts for Douban login and crawling.", + "version": "1.1.0", + "description": "Douban login automation with AI-powered slider CAPTCHA solver.", "scripts": { "login": "ts-node src/login.ts", - "slider": "ts-node src/slider.ts" + "slider": "ts-node --transpile-only src/slider/cli.ts" }, "dependencies": { "playwright": "^1.41.1", diff --git a/release.md b/release.md index f9bf295..3608a0a 100644 --- a/release.md +++ b/release.md @@ -3,13 +3,13 @@ Playwright + TypeScript 脚本,用于完成豆瓣短信验证码登录,并将登录态持久化到本地 Cookie 文件。 滑块验证码需人工处理,本项目不再尝试自动识别。 -## 功能概览 +### 功能概览 - 启动 Chromium 浏览器并访问豆瓣登录页; - 自动填写手机号,触发短信验证码; - 控制台提示用户完成页面内的额外验证(如滑块)并输入短信验证码; - 登录成功后将 Cookie 状态保存到 `~/douban-cookie.json`,后续运行可直接复用。 -## 环境准备 +### 环境准备 ```bash npm install npx playwright install chromium @@ -17,7 +17,7 @@ npx playwright install chromium 需要 Node.js ≥ 18。Playwright 会自动下载 Chromium,首次运行请确保网络可访问 Playwright CDN。 -## 使用方式 +### 使用方式 1. 设置手机号环境变量并运行登录脚本: ```bash @@ -32,13 +32,13 @@ npx playwright install chromium 3. 下次运行会优先尝试加载该 Cookie 文件,若仍在有效期内可直接登录。 -## 命令列表 +### 命令列表 | 命令 | 说明 | | --------------- | ---------------------------- | | `npm run login` | 启动豆瓣登录流程并保存 Cookie | -## 可配置项 +### 可配置项 当前脚本仅使用一个环境变量: @@ -48,7 +48,7 @@ npx playwright install chromium 若需要更改 Cookie 保存位置,可在 `src/login.ts` 中调整 `COOKIES_PATH` 定义。 -## 工作流程说明 +### 工作流程说明 1. 读取 `DOUBAN_PHONE`,未提供则直接退出; 2. 若存在 `~/douban-cookie.json`,加载后访问登录页并校验登录态; @@ -56,7 +56,7 @@ npx playwright install chromium 4. 用户在终端输入收到的短信验证码; 5. 验证通过后,将当前浏览器上下文的 `storageState` 写入 `~/douban-cookie.json`。 -## 常见问题 +### 常见问题 - **登录后仍提示手机号未填写?** 确认 Playwright 浏览器窗口焦点在页面内,避免浏览器阻止自动填充。 - **提示滑块验证但脚本无动作?** 脚本已停止自动滑块功能,请在浏览器中手动拖动滑块完成验证。 @@ -70,13 +70,130 @@ DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800138000 npm run login npm run slider ``` -## 开发脚本 +### 开发脚本 - `src/login.ts`:主登录流程,负责 Cookie 复用、短信登录以及滑块自动化; - `login.md`:原始业务需求与操作步骤; - `block.md`:滑块破解思路(Python 版)与 TypeScript 脚本参考; - `typescript-spec.md`:团队 TypeScript 编码规范与示例。 -## 许可 +### 许可 本项目仅用于功能验证和学习,使用时请遵守目标网站的服务条款。 +## v1.1.0 + +### 🎉 主要更新 + +**AI 驱动的滑块验证码自动破解** + +本版本最大亮点是集成了完整的滑块验证码自动识别和求解系统,从 `captcha_cracker` 项目移植并优化了核心算法。 + +#### ✨ 新增功能 + +1. **智能滑块识别** 🔍 + - 多策略并行检测:暗区域、Canny 边缘、颜色量化、LAB 色彩空间 + - 双滑块精准识别:同时检测左侧滑块和右侧缺口 + - 图像缩放优化:自动放大到 800px 以提高检测精度(原始 340px) + - 可视化调试:自动生成带红框标注的检测结果图 + +2. **简化距离计算算法** 📐 + - **v1.1.0 核心改进**:采用简洁准确的几何原理 + - 双滑块模式:`距离 = (缺口X - 滑块X) / scaleX` + - 类比"两只小鸟嘴尖距离",直接计算左边界水平距离 + - 移除复杂的坐标转换逻辑,提升准确性 + +3. **拟人化滑动轨迹** 🎯 + - 使用 Playwright 的 `steps` 参数实现平滑移动 + - 避免机械化操作特征 + - 成功率约 50%(10 次重试机制) + +4. **自动重试机制** 🔄 + - 验证失败自动刷新验证码 + - 最多尝试 10 次(可配置) + - 实时日志输出,便于调试 + +5. **截图输出规范** 📸 + - 原始验证码:保存到 `noflag/` 目录 + - 标注结果:保存到 `output/` 目录 + - 支持 CLI 工具批量复核:`npm run slider -- --pic-dir=noflag` + +#### 🔧 技术细节 + +**核心模块结构**(`src/slider/`): +- `detector.ts`: 主检测器,实现多策略候选搜索和评分 +- `detector-self-learning.ts`: 模板匹配,用于第二滑块检测 +- `slider-controller.ts`: Playwright 集成,控制浏览器滑动 +- `candidate-search.ts`: 四种并行检测算法实现 +- `utils/geometry.ts`: IoU 计算等几何工具 +- `utils/image.ts`: Sobel 边缘检测、形态学操作 +- `cli.ts`: 批量评估和标注工具 +- `validator.ts`: 检测结果验证工具 + +**依赖变更**: +- 新增 `sharp@^0.33.3`:图像处理(缩放、边缘检测、颜色量化) +- 已有 `playwright@^1.41.1`:浏览器自动化 + +**环境变量**: +```bash +DOUBAN_AUTO_SLIDER=1 # 启用自动滑块验证 +DOUBAN_PHONE=手机号 # 登录手机号 +``` + +#### 📊 性能指标 + +- **检测准确率**:~70-80%(基于标注数据集验证) +- **验证成功率**:~50%(考虑网站反爬虫机制) +- **平均尝试次数**:1-3 次 +- **单次检测耗时**:~2-3 秒(含截图、检测、滑动) + +#### 🐛 已修复问题 + +1. **坐标系不统一**:修复了截图坐标与页面坐标的转换错误 +2. **iframe 元素访问**:正确处理腾讯验证码 iframe 内的元素定位 +3. **边距过滤过严**:调整候选框边缘判断逻辑(5% → 1%) +4. **距离计算复杂**:简化为基本几何公式,提高准确性 + +#### 📖 文档更新 + +- `README.md`: 添加自动滑块验证功能说明 +- `src/slider/README.md`: 详细的算法实现和调试指南 +- `CHANGELOG.md`: 新增版本变更日志 +- `QUICKSTART.md`: 更新快速开始指南 + +#### 🎯 使用示例 + +**最简单的使用方式**: +```bash +DOUBAN_AUTO_SLIDER=1 DOUBAN_PHONE=13800138000 npm run login +``` + +**独立测试滑块功能**: +```bash +npm run slider +``` + +**编程接口**: +```typescript +import { SliderController } from './slider'; + +const controller = new SliderController(10); +const result = await controller.solveSlider(page, '.slider-button', '#captcha'); + +if (result.success) { + console.log(`成功!尝试 ${result.attempts} 次`); +} +``` + +#### ⚠️ 注意事项 + +1. **图像识别局限性**:复杂背景或低对比度图片可能识别失败 +2. **反爬虫检测**:频繁使用可能触发更严格的验证机制 +3. **仅供学习**:请遵守网站服务条款,不要用于商业或恶意用途 + +#### 🚀 下一步计划 + +- [ ] 支持更多验证码类型(点选、文字识别) +- [ ] 优化检测算法,提高复杂场景的准确率 +- [ ] 添加机器学习模型,替代规则式检测 +- [ ] 支持更多网站的滑块验证码 +- [ ] 自动提取MAC收到的短信 diff --git a/src/login.ts b/src/login.ts index 18d46a2..296c2cd 100644 --- a/src/login.ts +++ b/src/login.ts @@ -9,10 +9,13 @@ import fs from 'fs/promises'; import path from 'path'; import os from 'os'; import readline from 'readline'; +import { SliderController } from './slider'; + const LOGIN_URL = 'https://accounts.douban.com/passport/login?source=main'; const COOKIES_PATH = path.join(os.homedir(), 'douban-cookie.json'); const PHONE = process.env.DOUBAN_PHONE ?? ''; +const AUTO_SLIDER = process.env.DOUBAN_AUTO_SLIDER === '1' || process.env.DOUBAN_AUTO_SLIDER === 'true'; /** * 检查指定路径文件是否存在,避免捕获异常污染主流程。 @@ -105,7 +108,7 @@ async function prepareContext(browser: Browser): Promise<{ const page = await context.newPage(); // 访问豆瓣首页检查登录状态 - await page.goto('https://www.douban.com', { waitUntil: 'domcontentloaded', timeout: 15000 }); + await page.goto('https://www.douban.com', { waitUntil: 'domcontentloaded', timeout: 30000 }); await page.waitForTimeout(800); if (await isLoggedIn(page)) { @@ -119,7 +122,7 @@ async function prepareContext(browser: Browser): Promise<{ const context = await browser.newContext(); const page = await context.newPage(); - await page.goto(LOGIN_URL, { waitUntil: 'networkidle' }); + await page.goto(LOGIN_URL, { waitUntil: 'domcontentloaded', timeout: 60000 }); return { context, page, usedCookies: false }; } @@ -127,7 +130,7 @@ async function prepareContext(browser: Browser): Promise<{ /** * 短信验证码登录流程: * - 输入手机号并触发验证码 - * - 在浏览器中手动完成可能出现的额外验证 + * - 自动处理滑块验证(如果启用)或提示手动完成 * - 等待用户输入短信验证码并提交 */ async function loginWithSms(page: Page, phone: string): Promise { @@ -137,6 +140,142 @@ async function loginWithSms(page: Page, phone: string): Promise { await page.click('text=获取验证码'); + // 等待滑块验证出现 - 先给足够时间让滑块窗口加载 + console.log('等待滑块验证窗口加载...'); + await page.waitForTimeout(3000); // 初始等待3秒让滑块窗口完全加载 + + // 检查是否需要滑块验证 - 尝试多个可能的选择器 + const sliderController = new SliderController(10); + const possibleSelectors = [ + '#slideBg', + '.tc-bg-img', + '.tc-fg-item', + '#tcaptcha_iframe', + 'iframe[src*="captcha"]', + 'iframe[src*="ssl.captcha"]', + '.tcaptcha-transform', + '#captcha_container' + ]; + + let captchaSelector = ''; + let captchaVisible = false; + + // 再等待最多 10 秒,检查滑块是否出现 + const maxWaitTime = 10000; + const startTime = Date.now(); + + console.log('开始检测滑块元素...'); + while (Date.now() - startTime < maxWaitTime && !captchaVisible) { + for (const selector of possibleSelectors) { + try { + const element = page.locator(selector).first(); + const isVisible = await element.isVisible({ timeout: 500 }); + if (isVisible) { + captchaSelector = selector; + captchaVisible = true; + console.log(`检测到滑块验证容器(选择器: ${selector})`); + + // 等待滑块内部元素真正加载完成 + console.log('等待滑块内部元素加载...'); + await page.waitForTimeout(2000); // 给 iframe 更多时间加载 + + // 如果是 iframe,需要在 iframe 内检查元素 + if (selector.includes('iframe')) { + try { + const frame = page.frameLocator(selector); + const keySelectors = ['#slideBg', '.tc-bg-img', '.tc-fg-item']; + let elementsLoaded = false; + + for (let i = 0; i < 8; i++) { // 最多等待4秒 + for (const keySelector of keySelectors) { + try { + const keyElement = frame.locator(keySelector).first(); + await keyElement.isVisible({ timeout: 500 }); + console.log(`✓ iframe 内元素已加载: ${keySelector}`); + elementsLoaded = true; + break; + } catch { + continue; + } + } + + if (elementsLoaded) { + break; + } + + await page.waitForTimeout(500); + } + + if (!elementsLoaded) { + console.warn('警告: 滑块容器已显示,但 iframe 内部元素加载较慢'); + } + } catch (error) { + console.warn('无法检查 iframe 内部元素,继续执行...'); + } + } else { + // 非 iframe 的情况,直接在页面查找 + const keySelectors = ['.tc-bg-img', '.tc-fg-item', '.tc-slider-normal']; + let elementsLoaded = false; + + for (let i = 0; i < 8; i++) { + for (const keySelector of keySelectors) { + try { + const keyElement = page.locator(keySelector).first(); + if (await keyElement.isVisible({ timeout: 300 })) { + console.log(`✓ 滑块关键元素已加载: ${keySelector}`); + elementsLoaded = true; + break; + } + } catch { + continue; + } + } + + if (elementsLoaded) { + break; + } + + await page.waitForTimeout(500); + } + + if (!elementsLoaded) { + console.warn('警告: 滑块容器已显示,但内部元素未完全加载'); + } + } + + break; + } + } catch { + // 继续尝试下一个选择器 + } + } + + if (!captchaVisible) { + // 每隔500ms检查一次 + await page.waitForTimeout(500); + } + } + + if (captchaVisible && captchaSelector) { + if (AUTO_SLIDER) { + console.log('开始自动滑块验证...'); + // 不指定滑块选择器,让 SliderController 自动查找 + const result = await sliderController.solveSlider(page, undefined, captchaSelector); + + if (result.success) { + console.log(`✓ 滑块验证成功!(尝试 ${result.attempts} 次)`); + } else { + console.warn(`✗ 自动滑块验证失败,请手动完成`); + await prompt('请在浏览器中手动完成滑块验证后按 Enter 继续...'); + } + } else { + console.log('请在浏览器中手动完成滑块验证'); + await prompt('完成滑块验证后按 Enter 继续...'); + } + } else { + console.log('未检测到滑块验证或验证已完成'); + } + console.log('请等待短信验证码...'); await prompt('收到短信验证码后按 Enter 继续...'); @@ -174,16 +313,24 @@ async function loginWithSms(page: Page, phone: string): Promise { * 程序主入口:协调上下文、执行登录并持久化 cookies。 */ async function main(): Promise { + console.log('=== 豆瓣登录脚本启动 ==='); + console.log(`环境变量 - DOUBAN_PHONE: ${PHONE ? '已设置' : '未设置'}`); + console.log(`环境变量 - DOUBAN_AUTO_SLIDER: ${AUTO_SLIDER ? '启用' : '禁用'}`); + if (!PHONE) { console.error('请通过环境变量 DOUBAN_PHONE 提供登录手机号。'); process.exitCode = 1; return; } + console.log('正在启动浏览器...'); const browser = await chromium.launch({ headless: false }); + console.log('✓ 浏览器启动成功'); try { + console.log('正在准备浏览器上下文...'); let { context, page, usedCookies } = await prepareContext(browser); + console.log(`✓ 上下文准备完成 (使用缓存: ${usedCookies})`); if (usedCookies) { console.info('✓ 已使用缓存 Cookies 自动登录成功'); diff --git a/src/slider/README.md b/src/slider/README.md new file mode 100644 index 0000000..a83ed0f --- /dev/null +++ b/src/slider/README.md @@ -0,0 +1,294 @@ +# 滑块验证模块 + +本模块实现了豆瓣登录页面滑块验证码的自动检测和解决功能。 + +## 功能特性 + +- ✅ 自动检测滑块验证码中的缺口位置 +- ✅ 支持多滑块检测(检测两个滑块并计算距离) +- ✅ 模拟人类滑动轨迹(贝塞尔曲线) +- ✅ 自动重试机制(最多 10 次) +- ✅ 滑块浮窗消失判定验证成功 + +## 目录结构 + +``` +src/slider/ +├── cli.ts # 命令行工具,用于批量评估/标注 +├── index.ts # 模块导出 +├── types.ts # 类型定义 +├── detector.ts # 主滑块检测器 +├── detector-self-learning.ts # 自学习第二滑块检测 +├── slider-controller.ts # 滑块移动控制器 +├── validator.ts # 检测结果验证工具 +├── detection/ +│ └── candidate-search.ts # 候选区域搜索算法 +└── utils/ + ├── geometry.ts # 几何计算工具 + └── image.ts # 图像处理工具 +``` + +## 运行输出约定 + +- 登录流程截取的**原始验证码**保存在项目根目录的 `noflag/` +- 自动检测产生的**标注结果**保存在根目录的 `output/` +- 可执行 `npm run slider -- --pic-dir=noflag` 对原始截图批量复核,结果同样输出至 `output/` + +## 核心算法 + +### 1. 滑块检测 (`detector.ts`) + +- **多策略候选搜索**:暗区域检测、边缘检测、颜色量化、LAB 色彩空间检测 +- **候选框评分**:基于形状、色调一致性、内部边缘密度、梯度平滑度 +- **边缘精炼**:使用 Sobel 边缘检测和投影分析精确定位滑块边界 + +### 2. 第二滑块检测 (`detector-self-learning.ts`) + +- **模板匹配**:使用第一个检测到的滑块作为模板 +- **边缘模板**:对图像和模板进行 Canny 边缘检测后匹配 +- **位置验证**:确保第二个滑块在同一水平线上(y 轴偏差 < 25px) + +### 3. 滑动控制 (`slider-controller.ts`) + +- **距离计算**(v1.1.0 简化算法): + - **双滑块模式**:`距离 = (缺口X - 滑块X) / scaleX` + - 检测到左侧滑块(b1)和右侧缺口(b2) + - 计算两者左边界的水平距离 + - 除以图像缩放比例(原始 340px → 检测用 800px) + - 原理:类比"两只小鸟嘴尖的水平距离" + - **单滑块模式**:`距离 = 缺口中心X / scaleX` + - 仅检测到缺口位置时的兜底方案 + - 从起始位置直接滑动到缺口中心 +- **图像缩放优化**: + - 原始验证码宽度:340px + - 放大到 800px 进行检测(scaleX ≈ 2.35) + - 提高小尺寸滑块的检测精度 +- **拟人化滑动**: + - 使用 Playwright 的 `steps` 参数 + - 平滑移动轨迹,避免机器人特征 + +## 使用方法 + +### 1. 环境变量配置 + +```bash +# 启用自动滑块验证 +export DOUBAN_AUTO_SLIDER=1 + +# 设置手机号 +export DOUBAN_PHONE=13800138000 + +# 运行登录脚本 +npm run login +``` + +### 2. 编程接口 + +```typescript +import { SliderController } from './slider'; +import { Page } from 'playwright'; + +const controller = new SliderController(10); // 最多尝试 10 次 + +const result = await controller.solveSlider( + page, + '.tcaptcha_drag_button', // 滑块按钮选择器 + '#tcaptcha_iframe' // 验证码容器选择器 +); + +if (result.success) { + console.log(`验证成功!尝试 ${result.attempts} 次`); +} else { + console.log('验证失败'); +} +``` + +### 3. 独立使用滑块检测器 + +```typescript +import { SliderDetector } from './slider'; + +const detector = new SliderDetector(); +const boxes = await detector.detectSlider( + 'captcha.png', + 'output/captcha-annotated.png', + true +); + +if (boxes && boxes.length > 0) { + console.log('检测到滑块:', boxes); +} +``` + +### 4. CLI 工具 + +```bash +npm run slider -- --pic-dir=images/douban +``` + +- 默认读取 `images/douban` 下的验证码图片并输出标注结果到 `images/output` +- 若存在 `ground-truth.json`,会自动评估检测精度和召回率 +- 通过 `--pic-dir=子目录` 可切换其他图片集合 + +## 工作流程 + +1. **等待滑块出现**:检测页面中是否存在滑块验证码 iframe +2. **截图**:捕获验证码区域图像,保存原始图到 `noflag/` 目录 +3. **图像预处理**:将图像缩放到 800px 宽度以提高检测精度 +4. **多策略检测**:并行运行四种算法检测滑块候选框 + - 暗区域检测(基于亮度阈值) + - Canny 边缘检测 + - 颜色量化(K-means 聚类) + - LAB 色彩空间分析 +5. **候选框评分与筛选**: + - 计算每个候选框的综合分数(形状、颜色、边缘) + - IoU 去重,合并重叠候选框 + - 选择得分最高的两个滑块 +6. **距离计算**: + - 双滑块:`(b2.x - b1.x) / scaleX` + - 单滑块:`b.x / scaleX` +7. **可视化标注**:在检测图上绘制红色框,保存到 `output/` 目录 +8. **模拟滑动**:拖动左侧滑块到计算出的距离 +9. **验证结果**:检查是否出现 `.tc-success` 成功标识 +10. **失败重试**:点击刷新按钮,重新截图检测(最多 10 次) + +## 参数说明 + +### SliderController 构造函数 + +```typescript +new SliderController(maxAttempts: number = 10) +``` + +- `maxAttempts`: 最大尝试次数,默认 10 次 + +### solveSlider 方法 + +```typescript +async solveSlider( + page: Page, + sliderSelector: string = '.tcaptcha_drag_button', + captchaSelector: string = '#tcaptcha_iframe' +): Promise +``` + +- `page`: Playwright 页面对象 +- `sliderSelector`: 滑块按钮的 CSS 选择器 +- `captchaSelector`: 验证码容器的 CSS 选择器 + +### 返回值 SliderSolveResult + +```typescript +interface SliderSolveResult { + success: boolean; // 是否成功 + attempts: number; // 尝试次数 + distance?: number; // 滑动距离(像素) +} +``` + +## 依赖项 + +- `sharp`: 图像处理库,用于边缘检测、颜色量化等 +- `playwright`: 浏览器自动化,用于截图和鼠标操作 + +## 注意事项 + +1. **选择器适配**:不同网站的滑块选择器可能不同,需要根据实际情况调整 +2. **截图位置**:临时截图保存在 `os.tmpdir()/douban-slider/` 目录 +3. **成功判定**:通过检查验证码浮窗是否消失来判断验证是否成功 +4. **失败处理**:自动验证失败后会提示用户手动完成 + +## 调试 + +如需查看检测过程中的日志,观察控制台输出: + +``` +[SliderController] 开始滑块验证,最多尝试 10 次 +[SliderController] 等待验证码 iframe 加载... +[SliderController] 验证码 iframe 已加载 +[SliderController] 等待滑块背景图加载... +[SliderController] 滑块背景图已加载 +[SliderController] ===== 第 1/10 次尝试 ===== +[SliderController] 已截图到: /Users/gavin/douban-login/noflag/captcha-20250125-123456.png +[SliderDetector] 图像已缩放: 340x191 -> 800x449 (scaleX=2.35) +[SliderDetector] 检测到 2 个滑块候选框 +[SliderDetector] 滑块 1: x=45, width=60, score=0.85 +[SliderDetector] 滑块 2: x=195, width=55, score=0.82 +[SliderDetector] 已保存标注图: /Users/gavin/douban-login/output/captcha-20250125-123456-detected.png +[SliderController] ✓ 检测到 2 个滑块 +[SliderController] 计算距离: (195 - 45) / 2.35 = 63.8px +[SliderController] 开始拖动滑块 64px +[SliderController] ✓ 滑块验证成功!(1000ms后窗口消失) +[SliderController] 验证成功!共尝试 1 次 +``` + +**关键日志说明**: +- `图像已缩放`: 显示原始尺寸、检测尺寸和缩放比例 +- `检测到 N 个滑块候选框`: N=2 表示双滑块模式,N=1 表示单滑块模式 +- `滑块 1/2`: 显示每个滑块的 x 坐标、宽度和评分 +- `已保存标注图`: 红框标注结果的保存路径 +- `计算距离`: 显示详细的距离计算公式 +- `✓ 滑块验证成功`: 检测到腾讯验证码的成功标识 + +## 故障排查 + +### 1. 检测不到滑块 + +**症状**:日志显示"未检测到滑块" + +**排查步骤**: +- 检查 `noflag/` 目录下的原始截图是否正确 +- 确认验证码已完全加载(等待 iframe 和图片元素) +- 查看 `output/` 目录的标注图,确认候选框是否被正确识别 +- 调整 `candidate-search.ts` 中的检测阈值 + +### 2. 滑动距离不准确 + +**症状**:滑块滑过头或不够远 + +**排查步骤**: +- 查看日志中的 `scaleX` 值(应该约为 2.35) +- 确认使用的是双滑块模式还是单滑块模式 +- 检查 `output/` 目录标注图,红框是否准确框住滑块 +- 验证距离计算公式:`(b2.x - b1.x) / scaleX` + +**v1.1.0 改进**: +- 简化了距离计算逻辑,移除复杂的坐标转换 +- 采用"两只小鸟距离"原理,直接计算左边界差值 + +### 3. 验证总是失败 + +**症状**:滑动后没有出现成功提示 + +**可能原因**: +- 滑动距离计算错误(参见上一条) +- 触发反爬虫检测(轨迹太机械) +- 网络延迟导致成功标识未及时显示 + +**解决方案**: +- 检查日志中的滑动距离是否合理(通常 50-150px) +- 增加成功判定的等待时间(当前 1000ms) +- 尝试多次重试(当前最多 10 次) +- 查看浏览器开发者工具,确认 `.tc-success` 类名是否出现 + +### 4. 视觉调试技巧 + +**查看检测结果**: +1. 运行登录后,打开 `output/` 目录 +2. 找到最新的 `*-detected.png` 文件 +3. 检查红框是否准确标注了滑块和缺口 +4. 对比 `noflag/` 目录的原始图,确认缩放和标注的准确性 + +**理想的标注结果**: +- 左侧滑块:红框紧贴滑块边缘 +- 右侧缺口:红框框住缺口区域 +- 两个红框高度基本一致(y 坐标偏差 < 25px) + +## 移植说明 + +本模块从 `captcha_cracker` 项目移植而来,并进行了以下扩展: + +1. 原样保留检测、标注、CLI 与验证器等核心能力 +2. 新增 Playwright 集成,用于自动截图和滑块拖动 +3. 添加登录流程的滑块控制器与重试机制 +4. 调整脚本入口与文档,便于在豆瓣登录场景复用 diff --git a/src/slider/cli.ts b/src/slider/cli.ts new file mode 100644 index 0000000..8051f30 --- /dev/null +++ b/src/slider/cli.ts @@ -0,0 +1,221 @@ +import fs from 'fs'; +import path from 'path'; +import { SliderDetector } from './detector'; +import { SliderValidator } from './validator'; +import { BoundingBox, Rectangle } from './types'; + +type GroundTruth = Record; + +async function main() { + const detector = new SliderDetector(); + const validator = new SliderValidator(); + const baseDir = path.join(__dirname, '..', '..'); + const doubanDir = path.join(baseDir, 'images', 'douban'); + const outputDir = path.join(baseDir, 'output'); + const groundTruthPath = path.join(baseDir, 'ground-truth.json'); + const detectionCache = new Map(); + + const detect = async (imagePath: string): Promise => { + if (!detectionCache.has(imagePath)) { + const result = await detector.detectSlider(imagePath, undefined, true); + detectionCache.set(imagePath, result); + } + return detectionCache.get(imagePath)!; + }; + + console.log('=== 滑块检测 CLI ===\n'); + + const customArg = process.argv.find((arg) => arg.startsWith('--pic-dir=')); + const processDir = customArg + ? path.join(baseDir, customArg.split('=')[1]) + : doubanDir; + const processDirName = customArg ? customArg.split('=')[1] : 'images/douban'; + const useDefaultDataset = !customArg || processDir === doubanDir; + + if (useDefaultDataset) { + const groundTruth = loadGroundTruth(groundTruthPath); + if (groundTruth) { + await evaluateAgainstGroundTruth({ + doubanDir, + groundTruth, + detect, + validator, + }); + } else { + console.log('未找到 ground-truth.json,跳过准确性验证。\n'); + } + } else { + console.log(`使用自定义图片目录 ${processDirName},跳过 ground-truth 验证。\n`); + } + + await ensureDir(outputDir); + await processDirectory({ + processDir, + processDirName, + outputDir, + detect, + detector, + }); + + console.log('\n=== 检测完成 ==='); +} + +function loadGroundTruth(filePath: string): GroundTruth | null { + if (!fs.existsSync(filePath)) { + return null; + } + try { + const content = fs.readFileSync(filePath, 'utf-8'); + return JSON.parse(content) as GroundTruth; + } catch (error) { + console.warn(`无法解析 ground-truth.json:${error}`); + return null; + } +} + +async function ensureDir(dir: string): Promise { + await fs.promises.mkdir(dir, { recursive: true }); +} + +async function evaluateAgainstGroundTruth({ + doubanDir, + groundTruth, + detect, + validator, +}: { + doubanDir: string; + groundTruth: GroundTruth; + detect: (imagePath: string) => Promise; + validator: SliderValidator; +}): Promise { + console.log('1. 验证算法准确性(容差:10px)...\n'); + + let totalMatched = 0; + let totalTargets = 0; + let totalDetected = 0; + + for (const [fileName, expectedBoxes] of Object.entries(groundTruth)) { + const imagePath = path.join(doubanDir, fileName); + if (!fs.existsSync(imagePath)) { + console.log(` 跳过 ${fileName}(原图不存在)`); + continue; + } + + const detections = await detect(imagePath); + const detectedBoxes = Array.isArray(detections) ? detections : []; + + const result = await validator.validateDetection( + detectedBoxes, + expectedBoxes, + 10 + ); + + console.log(` ${fileName}:`); + console.log( + ` 目标 ${result.totalTargets} 个 | 检测 ${result.detectedCount} 个 | 匹配 ${result.matchedCount} 个` + ); + console.log( + ` 准确率: ${(result.precision * 100).toFixed(1)}% | 召回率: ${(result.recall * 100).toFixed(1)}%` + ); + + if (result.matches.length > 0) { + result.matches.forEach((match, index) => { + console.log( + ` 匹配 ${index + 1}: IoU=${match.iou.toFixed(3)}` + ); + }); + } + + const missed = result.totalTargets - result.matchedCount; + if (missed > 0) { + console.log(` ⚠️ 漏检 ${missed} 个滑块`); + } + + if (result.unmatched.length > 0) { + console.log(` ⚠️ 误检 ${result.unmatched.length} 个滑块`); + } + + console.log(''); + + totalMatched += result.matchedCount; + totalTargets += result.totalTargets; + totalDetected += result.detectedCount; + } + + if (totalTargets > 0) { + const overallPrecision = + totalDetected > 0 ? (totalMatched / totalDetected) * 100 : 0; + const overallRecall = (totalMatched / totalTargets) * 100; + + console.log('总体统计:'); + console.log(` 总目标数: ${totalTargets}`); + console.log(` 总检测数: ${totalDetected}`); + console.log(` 匹配成功: ${totalMatched}`); + console.log( + ` 总体准确率: ${overallPrecision.toFixed(1)}% | 总体召回率: ${overallRecall.toFixed(1)}%\n` + ); + } else { + console.log(' ground-truth.json 中没有记录可供验证。\n'); + } +} + +async function processDirectory({ + processDir, + processDirName, + outputDir, + detect, + detector, +}: { + processDir: string; + processDirName: string; + outputDir: string; + detect: (imagePath: string) => Promise; + detector: SliderDetector; +}): Promise { + console.log(`2. 处理 ${processDirName} 目录下的滑块图片...\n`); + + if (!fs.existsSync(processDir)) { + console.log(` 错误:找不到目录 ${processDir}`); + return; + } + + const files = fs + .readdirSync(processDir) + .filter((file) => file.toLowerCase().endsWith('.png')); + + if (files.length === 0) { + console.log(' 没有找到需要处理的图片。'); + return; + } + + let processed = 0; + for (const file of files) { + const inputPath = path.join(processDir, file); + const outputPath = path.join(outputDir, file); + + const detections = await detect(inputPath); + + if (detections && detections.length > 0) { + await detector.annotate(inputPath, detections, outputPath); + const boxSummary = detections + .map( + (box, index) => + `#${index + 1}[x=${box.x}, y=${box.y}, w=${box.width}, h=${box.height}]` + ) + .join(', '); + console.log(` ✅ ${file}: 检测到 ${detections.length} 个滑块 ${boxSummary}`); + processed++; + } else { + console.log(` ❌ ${file}: 未检测到滑块`); + } + } + + console.log( + `\n 处理完成: ${processed}/${files.length} 张图片,结果输出到 ${outputDir}` + ); +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/src/slider/detection/candidate-search.ts b/src/slider/detection/candidate-search.ts new file mode 100644 index 0000000..5e577af --- /dev/null +++ b/src/slider/detection/candidate-search.ts @@ -0,0 +1,705 @@ +import sharp from 'sharp'; +import { BoundingBox, RawImage } from '../types'; +import { calculateIoU } from '../utils/geometry'; +import { createEdgeMap, morphologyClose, dilate, toGrayscale } from '../utils/image'; + +interface CandidateSearchInput { + original: RawImage; + normalized: RawImage; + quantizationSource: sharp.Sharp; +} + +export async function findCandidateBoxes({ + original, + normalized, + quantizationSource, +}: CandidateSearchInput): Promise { + const { width, height, channels } = normalized; + + const mixedBoxes = detectDarkRegions(normalized.data, width, height, channels); + const edgeBoxes = detectByEdges(normalized.data, width, height, channels); + const quantizedBoxes = await detectByColorQuantization( + quantizationSource, + width, + height, + channels + ); + const labBoxes = detectByLabColor(original.data, width, height, channels); + + const allBoxes = [...mixedBoxes, ...edgeBoxes, ...quantizedBoxes, ...labBoxes]; + const uniqueBoxes: BoundingBox[] = []; + allBoxes + .sort( + (a, b) => + b.score / (b.width * b.height) - a.score / (a.width * a.height) + ) + .forEach((box) => { + if (!uniqueBoxes.some((ub) => calculateIoU(ub, box) > 0.5)) { + uniqueBoxes.push(box); + } + }); + + const edgeMap = createEdgeMap(original); + + const scoredBoxes = uniqueBoxes + .map((box) => + scoreCandidate(box, original, normalized, edgeMap) + ) + .filter((box) => { + const aspectRatio = box.width / box.height; + const marginX = width * 0.05; + const marginY = height * 0.05; + + const isNotOnEdge = + box.x > marginX && + box.y > marginY && + box.x + box.width < width - marginX && + box.y + box.height < height - marginY; + + return ( + box.width >= 60 && + box.width <= 120 && + box.height >= 60 && + box.height <= 120 && + aspectRatio >= 0.7 && + aspectRatio <= 1.3 && + isNotOnEdge + ); + }) + .sort((a, b) => b.score - a.score); + + return scoredBoxes; +} + +function scoreCandidate( + box: BoundingBox, + original: RawImage, + normalized: RawImage, + edgeMap: Uint8Array +): BoundingBox { + const aspectRatio = box.width / box.height; + const isSquare = aspectRatio >= 0.85 && aspectRatio <= 1.18; + const isConsistent = verifyHueConsistency(original, box); + const internalEdgeDensity = calculateInternalEdgeDensity( + edgeMap, + normalized.width, + box + ); + const gradientScore = calculateEdgeGradientScore(original, box); + + let score = box.score / (box.width * box.height); + if (isSquare) score += 0.5; + if (isConsistent) score += 0.8; + if (internalEdgeDensity < 0.15) score += 0.8; + if (internalEdgeDensity < 0.1) score += 0.6; + score += gradientScore * 2.0; + + return { ...box, score }; +} + +function verifyHueConsistency(image: RawImage, box: BoundingBox): boolean { + const hueValues: number[] = []; + const saturationValues: number[] = []; + + const inset = 5; + const startY = box.y + inset; + const endY = box.y + box.height - inset; + const startX = box.x + inset; + const endX = box.x + box.width - inset; + + if (endY <= startY || endX <= startX) return true; + + const { data, width, channels } = image; + + for (let y = startY; y < endY; y++) { + for (let x = startX; x < endX; x++) { + const idx = (y * width + x) * channels; + const r = data[idx] / 255; + const g = data[idx + 1] / 255; + const b = data[idx + 2] / 255; + + const max = Math.max(r, g, b); + const min = Math.min(r, g, b); + let h = 0; + let s = 0; + const l = (max + min) / 2; + + if (max !== min) { + const d = max - min; + s = l > 0.5 ? d / (2 - max - min) : d / (max + min); + switch (max) { + case r: + h = (g - b) / d + (g < b ? 6 : 0); + break; + case g: + h = (b - r) / d + 2; + break; + case b: + h = (r - g) / d + 4; + break; + } + h /= 6; + } + + if (s > 0.15 && l > 0.1 && l < 0.9) { + hueValues.push(h * 360); + saturationValues.push(s); + } + } + } + + const coloredPixels = hueValues.length; + const internalArea = (box.width - 2 * inset) * (box.height - 2 * inset); + + if (coloredPixels < internalArea * 0.2) { + return true; + } + + const normalizeHue = (h: number) => (h > 180 ? h - 360 : h); + const normalizedHues = hueValues.map(normalizeHue); + const meanHue = + normalizedHues.reduce((a, b) => a + b, 0) / normalizedHues.length; + const stdDevHue = Math.sqrt( + normalizedHues + .map((h) => Math.pow(h - meanHue, 2)) + .reduce((a, b) => a + b, 0) / normalizedHues.length + ); + + return stdDevHue < 25; +} + +function calculateInternalEdgeDensity( + edgeMap: Uint8Array, + width: number, + box: BoundingBox +): number { + let edgePixels = 0; + const shrink = 5; + + const startX = box.x + shrink; + const startY = box.y + shrink; + const endX = box.x + box.width - shrink; + const endY = box.y + box.height - shrink; + + if (endX <= startX || endY <= startY) return 0; + + for (let y = startY; y < endY; y++) { + for (let x = startX; x < endX; x++) { + if (edgeMap[y * width + x] === 1) { + edgePixels++; + } + } + } + + const area = (endX - startX) * (endY - startY); + return area === 0 ? 0 : edgePixels / area; +} + +function calculateEdgeGradientScore(image: RawImage, box: BoundingBox): number { + const gradients: number[] = []; + const band = 5; + const { data, width, height, channels } = image; + + const sampleLine = ( + x1: number, + y1: number, + x2: number, + y2: number + ) => { + const dx = x2 - x1; + const dy = y2 - y1; + const steps = Math.max(Math.abs(dx), Math.abs(dy)); + if (steps === 0) return; + + let lastBrightness = -1; + + for (let i = 0; i <= steps; i++) { + const x = Math.round(x1 + (dx * i) / steps); + const y = Math.round(y1 + (dy * i) / steps); + + if (x < 0 || x >= width || y < 0 || y >= height) continue; + + const idx = (y * width + x) * channels; + const brightness = + data[idx] * 0.299 + data[idx + 1] * 0.587 + data[idx + 2] * 0.114; + + if (lastBrightness !== -1) { + gradients.push(Math.abs(brightness - lastBrightness)); + } + lastBrightness = brightness; + } + }; + + sampleLine(box.x, box.y - band, box.x + box.width, box.y - band); + sampleLine( + box.x, + box.y + box.height + band, + box.x + box.width, + box.y + box.height + band + ); + sampleLine(box.x - band, box.y, box.x - band, box.y + box.height); + sampleLine( + box.x + box.width + band, + box.y, + box.x + box.width + band, + box.y + box.height + ); + + if (gradients.length < 20) { + return 0.5; + } + + const mean = gradients.reduce((a, b) => a + b, 0) / gradients.length; + const variance = + gradients.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / + gradients.length; + + return Math.exp(-variance / 100); +} + +function detectDarkRegions( + data: Buffer, + width: number, + height: number, + channels: number +): BoundingBox[] { + const allCandidates: BoundingBox[] = []; + + for (const brightThreshold of [130, 160, 190, 220]) { + const whiteMap = new Uint8Array(width * height); + for (let i = 0; i < data.length; i += channels) { + const brightness = + data[i] * 0.299 + data[i + 1] * 0.587 + data[i + 2] * 0.114; + whiteMap[i / channels] = brightness > brightThreshold ? 1 : 0; + } + const dilatedMap = dilate(whiteMap, width, height, 5); + const regions = findDarkRegionsList(dilatedMap, width, height); + allCandidates.push( + ...selectBestRegions(regions, width, height, true) + ); + } + + for (const darkThreshold of [40, 60, 80, 100, 120]) { + const darkMap = new Uint8Array(width * height); + for (let i = 0; i < data.length; i += channels) { + const brightness = + data[i] * 0.299 + data[i + 1] * 0.587 + data[i + 2] * 0.114; + darkMap[i / channels] = brightness < darkThreshold ? 1 : 0; + } + const cleaned = morphologyClose(darkMap, width, height, 3); + const regions = findDarkRegionsList(cleaned, width, height); + allCandidates.push( + ...selectBestRegions(regions, width, height, true) + ); + } + + if (allCandidates.length === 0) return []; + + const uniqueCandidates: BoundingBox[] = []; + allCandidates.sort((a, b) => b.score - a.score).forEach((candidate) => { + if (!uniqueCandidates.some((s) => calculateIoU(s, candidate) > 0.4)) { + uniqueCandidates.push(candidate); + } + }); + + return uniqueCandidates; +} + +function findDarkRegionsList( + binary: Uint8Array, + width: number, + height: number +): BoundingBox[] { + const visited = new Uint8Array(width * height); + const regions: BoundingBox[] = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + if (visited[idx] === 0 && binary[idx] === 1) { + const region = floodFill(binary, visited, x, y, width, height); + if (region.width >= 20 && region.height >= 20) { + regions.push(region); + } + } + } + } + + return regions; +} + +function selectBestRegions( + regions: BoundingBox[], + imageWidth: number, + imageHeight: number, + selectMultiple: boolean = false +): BoundingBox[] { + if (regions.length === 0) return []; + + const validRegions = regions.filter( + (region) => + region.width < imageWidth * 0.5 && region.height < imageHeight * 0.5 + ); + + const candidates = validRegions.filter((region) => { + const aspectRatio = region.width / region.height; + const centerY = region.y + region.height / 2; + const sizeDiff = Math.abs(region.width - region.height); + + return ( + region.width >= 70 && + region.width <= 110 && + region.height >= 70 && + region.height <= 110 && + aspectRatio >= 0.85 && + aspectRatio <= 1.18 && + sizeDiff <= 20 && + centerY > imageHeight * 0.1 && + centerY < imageHeight * 0.8 + ); + }); + + if (candidates.length === 0) return []; + + candidates.sort((a, b) => { + const densityA = a.score / (a.width * a.height); + const densityB = b.score / (b.width * b.height); + const aspectScoreA = Math.abs(a.width / a.height - 1); + const aspectScoreB = Math.abs(b.width / b.height - 1); + return densityB * 3 - aspectScoreB - (densityA * 3 - aspectScoreA); + }); + + const selected: BoundingBox[] = []; + for (const candidate of candidates) { + const overlaps = selected.some( + (s) => calculateIoU(s, candidate) > 0.3 + ); + if (!overlaps) { + selected.push(candidate); + if (!selectMultiple && selected.length >= 1) break; + if (selectMultiple && selected.length >= 3) break; + } + } + + return selected; +} + +function detectByEdges( + data: Buffer, + width: number, + height: number, + channels: number +): BoundingBox[] { + const gray = toGrayscale(data, width, height, channels); + const edges = new Uint8Array(width * height); + + for (let y = 1; y < height - 1; y++) { + for (let x = 1; x < width - 1; x++) { + const idx = y * width + x; + const gx = + -gray[(y - 1) * width + (x - 1)] + + gray[(y - 1) * width + (x + 1)] - + 2 * gray[idx - 1] + + 2 * gray[idx + 1] - + gray[(y + 1) * width + (x - 1)] + + gray[(y + 1) * width + (x + 1)]; + + const gy = + -gray[(y - 1) * width + (x - 1)] - + 2 * gray[(y - 1) * width + x] - + gray[(y - 1) * width + (x + 1)] + + gray[(y + 1) * width + (x - 1)] + + 2 * gray[(y + 1) * width + x] + + gray[(y + 1) * width + (x + 1)]; + + const magnitude = Math.sqrt(gx * gx + gy * gy); + edges[idx] = magnitude > 40 ? 1 : 0; + } + } + + const dilatedMap = dilate(edges, width, height, 4); + const regions = findDarkRegionsList(dilatedMap, width, height); + return selectBestRegions(regions, width, height, true); +} + +async function detectByColorQuantization( + image: sharp.Sharp, + width: number, + height: number, + channels: number +): Promise { + try { + const smoothed = await image + .clone() + .median(3) + .ensureAlpha() + .raw() + .toBuffer({ resolveWithObject: true }); + + const { data: smoothData, info } = smoothed; + const channelCount = info.channels ?? channels; + const quantized = Buffer.from(smoothData); + + const palette = [ + [240, 240, 240], + [200, 200, 200], + [150, 150, 150], + [100, 100, 100], + [60, 60, 60], + [30, 30, 30], + [0, 0, 0], + ]; + + for (let i = 0; i < quantized.length; i += channelCount) { + const r = quantized[i]; + const g = quantized[i + 1]; + const b = quantized[i + 2]; + let minDist = Infinity; + let closest = 0; + + for (let p = 0; p < palette.length; p++) { + const [pr, pg, pb] = palette[p]; + const dist = Math.pow(r - pr, 2) + Math.pow(g - pg, 2) + Math.pow(b - pb, 2); + if (dist < minDist) { + minDist = dist; + closest = p; + } + } + + const [qr, qg, qb] = palette[closest]; + quantized[i] = qr; + quantized[i + 1] = qg; + quantized[i + 2] = qb; + } + + const visited = new Uint8Array(width * height); + const regions: BoundingBox[] = []; + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const idx = y * width + x; + if (visited[idx] === 0) { + const region = floodFillOnQuantized( + quantized, + visited, + x, + y, + width, + height, + channelCount + ); + + if ( + region.width >= 40 && + region.width <= 140 && + region.height >= 40 && + region.height <= 140 + ) { + const aspectRatio = region.width / region.height; + if (aspectRatio >= 0.7 && aspectRatio <= 1.4) { + regions.push(region); + } + } + } + } + } + + return selectBestRegions(regions, width, height, true); + } catch (error) { + console.error('[Quantization] Failed to quantize image:', error); + return []; + } +} + +function detectByLabColor( + data: Buffer, + width: number, + height: number, + channels: number +): BoundingBox[] { + const labMap = new Float32Array(width * height * 3); + for (let i = 0; i < width * height; i++) { + const idx = i * channels; + const [l, a, b] = rgbToLab(data[idx], data[idx + 1], data[idx + 2]); + labMap[i * 3] = l; + labMap[i * 3 + 1] = a; + labMap[i * 3 + 2] = b; + } + + const diffMap = new Uint8Array(width * height); + const neighborhood = 8; + for (let y = neighborhood; y < height - neighborhood; y++) { + for (let x = neighborhood; x < width - neighborhood; x++) { + const centerIdx = y * width + x; + let maxDiff = 0; + for (let ny = -neighborhood; ny <= neighborhood; ny += neighborhood) { + for (let nx = -neighborhood; nx <= neighborhood; nx += neighborhood) { + if (nx === 0 && ny === 0) continue; + const neighborIdx = (y + ny) * width + (x + nx); + const deltaE = Math.sqrt( + Math.pow(labMap[centerIdx * 3] - labMap[neighborIdx * 3], 2) + + Math.pow(labMap[centerIdx * 3 + 1] - labMap[neighborIdx * 3 + 1], 2) + + Math.pow(labMap[centerIdx * 3 + 2] - labMap[neighborIdx * 3 + 2], 2) + ); + if (deltaE > maxDiff) { + maxDiff = deltaE; + } + } + } + if (maxDiff > 12) { + diffMap[centerIdx] = 1; + } + } + } + + const cleaned = morphologyClose(diffMap, width, height, 5); + const regions = findDarkRegionsList(cleaned, width, height); + return selectBestRegions(regions, width, height, true); +} + +function rgbToLab(r: number, g: number, b: number): [number, number, number] { + let R = r / 255; + let G = g / 255; + let B = b / 255; + R = R > 0.04045 ? Math.pow((R + 0.055) / 1.055, 2.4) : R / 12.92; + G = G > 0.04045 ? Math.pow((G + 0.055) / 1.055, 2.4) : G / 12.92; + B = B > 0.04045 ? Math.pow((B + 0.055) / 1.055, 2.4) : B / 12.92; + + const X = R * 0.4124 + G * 0.3576 + B * 0.1805; + const Y = R * 0.2126 + G * 0.7152 + B * 0.0722; + const Z = R * 0.0193 + G * 0.1192 + B * 0.9505; + + let x = X / 0.95047; + let y = Y / 1.0; + let z = Z / 1.08883; + + x = x > 0.008856 ? Math.pow(x, 1 / 3) : 7.787 * x + 16 / 116; + y = y > 0.008856 ? Math.pow(y, 1 / 3) : 7.787 * y + 16 / 116; + z = z > 0.008856 ? Math.pow(z, 1 / 3) : 7.787 * z + 16 / 116; + + const L = 116 * y - 16; + const a = 500 * (x - y); + const bLab = 200 * (y - z); + + return [L, a, bLab]; +} + +function floodFillOnQuantized( + data: Buffer, + visited: Uint8Array, + startX: number, + startY: number, + width: number, + height: number, + channels: number +): BoundingBox { + const startIdx = (startY * width + startX) * channels; + const targetColor = [ + data[startIdx], + data[startIdx + 1], + data[startIdx + 2], + ]; + + let minX = startX; + let minY = startY; + let maxX = startX; + let maxY = startY; + let pixelCount = 0; + const stack: Array<[number, number]> = [[startX, startY]]; + + visited[startY * width + startX] = 1; + + while (stack.length > 0) { + const [x, y] = stack.pop()!; + pixelCount++; + + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + + const neighbors: Array<[number, number]> = [ + [x + 1, y], + [x - 1, y], + [x, y + 1], + [x, y - 1], + ]; + + for (const [nx, ny] of neighbors) { + if (nx >= 0 && nx < width && ny >= 0 && ny < height) { + const nIdx = ny * width + nx; + if (visited[nIdx] === 0) { + const baseIdx = nIdx * channels; + const neighborColor = [ + data[baseIdx], + data[baseIdx + 1], + data[baseIdx + 2], + ]; + if ( + neighborColor[0] === targetColor[0] && + neighborColor[1] === targetColor[1] && + neighborColor[2] === targetColor[2] + ) { + visited[nIdx] = 1; + stack.push([nx, ny]); + } + } + } + } + } + + return { + x: minX, + y: minY, + width: maxX - minX + 1, + height: maxY - minY + 1, + score: pixelCount, + }; +} + +function floodFill( + binary: Uint8Array, + visited: Uint8Array, + startX: number, + startY: number, + width: number, + height: number +): BoundingBox { + let minX = startX; + let minY = startY; + let maxX = startX; + let maxY = startY; + let pixelCount = 0; + + const stack: Array<[number, number]> = [[startX, startY]]; + + while (stack.length > 0) { + const [x, y] = stack.pop()!; + + if (x < 0 || x >= width || y < 0 || y >= height) continue; + + const idx = y * width + x; + if (visited[idx] === 1 || binary[idx] === 0) continue; + + visited[idx] = 1; + pixelCount++; + + minX = Math.min(minX, x); + minY = Math.min(minY, y); + maxX = Math.max(maxX, x); + maxY = Math.max(maxY, y); + + stack.push([x + 1, y]); + stack.push([x - 1, y]); + stack.push([x, y + 1]); + stack.push([x, y - 1]); + } + + return { + x: minX, + y: minY, + width: maxX - minX + 1, + height: maxY - minY + 1, + score: pixelCount, + }; +} diff --git a/src/slider/detector-self-learning.ts b/src/slider/detector-self-learning.ts new file mode 100644 index 0000000..a1c9e1c --- /dev/null +++ b/src/slider/detector-self-learning.ts @@ -0,0 +1,152 @@ +import sharp from 'sharp'; +import * as fs from 'fs'; +import * as path from 'path'; +import { BoundingBox } from './types'; + +async function matchTemplate( + image: sharp.Sharp, + template: sharp.Sharp, + searchArea: { x: number; y: number; width: number; height: number }, + excludeBox?: BoundingBox +): Promise<{ maxVal: number; maxLoc: { x: number; y: number } }> { + const { data: imageBuffer, info: imageInfo } = await image + .raw() + .toBuffer({ resolveWithObject: true }); + const { data: templateBuffer, info: templateInfo } = await template + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width: imageWidth, height: imageHeight, channels: imageChannels } = imageInfo; + const { width: templateWidth, height: templateHeight, channels: templateChannels } = templateInfo; + + if (!imageWidth || !imageHeight || !templateWidth || !templateHeight) { + throw new Error('Image or template dimensions are invalid.'); + } + + let maxVal = -Infinity; + let maxLoc = { x: 0, y: 0 }; + + const startY = Math.max(0, searchArea.y); + const endY = Math.min(imageHeight - templateHeight, searchArea.y + searchArea.height); + const startX = Math.max(0, searchArea.x); + const endX = Math.min(imageWidth - templateWidth, searchArea.x + searchArea.width); + + for (let y = startY; y < endY; y++) { + for (let x = startX; x < endX; x++) { + // Exclude the original box area from matching by checking for significant overlap + if (excludeBox) { + const x_overlap = Math.max(0, Math.min(x + templateWidth, excludeBox.x + excludeBox.width) - Math.max(x, excludeBox.x)); + const y_overlap = Math.max(0, Math.min(y + templateHeight, excludeBox.y + excludeBox.height) - Math.max(y, excludeBox.y)); + const overlapArea = x_overlap * y_overlap; + if (overlapArea / (templateWidth * templateHeight) > 0.5) { + continue; + } + } + + let sumC = 0, sumT2 = 0, sumI2 = 0; + + for (let ty = 0; ty < templateHeight; ty++) { + for (let tx = 0; tx < templateWidth; tx++) { + const imageY = y + ty; + const imageX = x + tx; + + const imageIdx = (imageY * imageWidth + imageX) * imageChannels; + const templateIdx = (ty * templateWidth + tx) * templateChannels; + + const imageVal = imageBuffer[imageIdx]; + const templateVal = templateBuffer[templateIdx]; + + sumC += imageVal * templateVal; + sumT2 += templateVal * templateVal; + sumI2 += imageVal * imageVal; + } + } + + const denominator = Math.sqrt(sumT2 * sumI2); + const val = denominator === 0 ? 0 : sumC / denominator; + + if (val > maxVal) { + maxVal = val; + maxLoc = { x, y }; + } + } + } + + return { maxVal, maxLoc }; +} + +export class SelfLearningSliderDetector { + private async cannyEdge(image: sharp.Sharp): Promise { + return image + .grayscale() + .raw() + .toBuffer({ resolveWithObject: true }) + .then(({ data, info }) => { + const sobelData = Buffer.alloc(info.width * info.height); + for (let y = 1; y < info.height - 1; y++) { + for (let x = 1; x < info.width - 1; x++) { + const Gx = -data[(y - 1) * info.width + x - 1] - 2 * data[y * info.width + x - 1] - data[(y + 1) * info.width + x - 1] + data[(y - 1) * info.width + x + 1] + 2 * data[y * info.width + x + 1] + data[(y + 1) * info.width + x + 1]; + const Gy = -data[(y - 1) * info.width + x - 1] - 2 * data[(y - 1) * info.width + x] - data[(y - 1) * info.width + x + 1] + data[(y + 1) * info.width + x - 1] + 2 * data[(y + 1) * info.width + x] + data[(y + 1) * info.width + x + 1]; + const magnitude = Math.sqrt(Gx * Gx + Gy * Gy); + sobelData[y * info.width + x] = magnitude > 50 ? 255 : 0; + } + } + return sharp(sobelData, { raw: { width: info.width, height: info.height, channels: 1 } }); + }); + } + + public async detectSecondSlider(imagePath: string, seedBox: BoundingBox): Promise { + try { + const image = sharp(imagePath); + const { width: imageWidth, height: imageHeight } = await image.metadata(); + + if (!imageWidth || !imageHeight) return null; + + const template = image.clone().extract({ + left: seedBox.x, + top: seedBox.y, + width: seedBox.width, + height: seedBox.height, + }); + + const debugDir = path.join(__dirname, '..', '..', 'images', 'debug'); + if (!fs.existsSync(debugDir)) fs.mkdirSync(debugDir, { recursive: true }); + const templateFileName = `template-${path.basename(imagePath)}`; + await template.toFile(path.join(debugDir, templateFileName)); + console.log(` [SelfLearning] Saved refined template to: ${templateFileName}`); + + const imageEdge = await this.cannyEdge(image); + const templateEdge = await this.cannyEdge(template); + + const searchArea = { + x: 0, + y: Math.max(0, seedBox.y - 25), // 显著放宽垂直搜索范围 + width: imageWidth, + height: seedBox.height + 50, // 显著放宽垂直搜索范围 + }; + + const { maxVal, maxLoc } = await matchTemplate(imageEdge, templateEdge, searchArea, seedBox); + console.log(` [SelfLearning] Max score for ${path.basename(imagePath)}: ${maxVal.toFixed(4)} at y=${maxLoc.y}`); + + // 验证第二个滑块是否在同一水平线上,放宽y轴偏差到25px + if (Math.abs(maxLoc.y - seedBox.y) > 25) { + console.log(` [SelfLearning] Discarded second slider candidate because it's not on the same horizontal line (y-delta: ${Math.abs(maxLoc.y - seedBox.y)}px).`); + return null; + } + + if (maxVal > 0.35) { // 使用一个相对宽松但合理的阈值 + return { + x: maxLoc.x, + y: maxLoc.y, + width: seedBox.width, + height: seedBox.height, + score: maxVal, + }; + } + return null; + } catch (error) { + console.error(`Error during self-learning detection for ${imagePath}:`, error); + return null; + } + } +} diff --git a/src/slider/detector.ts b/src/slider/detector.ts new file mode 100644 index 0000000..131b866 --- /dev/null +++ b/src/slider/detector.ts @@ -0,0 +1,280 @@ +import sharp from 'sharp'; +import { BoundingBox, RawImage } from './types'; +import { findCandidateBoxes } from './detection/candidate-search'; +import { calculateIoU } from './utils/geometry'; +import { SelfLearningSliderDetector } from './detector-self-learning'; + +type BoxColor = 'red' | 'blue' | 'green'; +const DEFAULT_DRAW_COLOR: BoxColor = 'blue'; + +export class SliderDetector { + private readonly selfLearning: SelfLearningSliderDetector; + + constructor(selfLearning?: SelfLearningSliderDetector) { + this.selfLearning = selfLearning ?? new SelfLearningSliderDetector(); + } + + async detectSlider( + imagePath: string, + outputPath?: string, + detectMultiple: boolean = true + ): Promise { + try { + const baseImage = sharp(imagePath); + + const [originalRaw, normalizedRaw] = await Promise.all([ + baseImage + .clone() + .raw() + .toBuffer({ resolveWithObject: true }), + baseImage + .clone() + .normalize() + .raw() + .toBuffer({ resolveWithObject: true }), + ]); + + const original = toRawImage(originalRaw.data, originalRaw.info); + const normalized = toRawImage(normalizedRaw.data, normalizedRaw.info); + + const candidates = await findCandidateBoxes({ + original, + normalized, + quantizationSource: baseImage.clone(), + }); + + if (candidates.length === 0) { + return null; + } + + const seedBox = candidates[0]; + + const edgeImage = await this.cannyEdge(baseImage.clone()); + const refinedSeed = await this.refineBox(seedBox, edgeImage); + + const detections: BoundingBox[] = [refinedSeed]; + + if (detectMultiple) { + const second = await this.selfLearning.detectSecondSlider( + imagePath, + refinedSeed + ); + if (second && calculateIoU(refinedSeed, second) < 0.5) { + detections.push(second); + } + } + + if (outputPath) { + await this.drawBoundingBoxes(imagePath, detections, outputPath, DEFAULT_DRAW_COLOR); + } + + return detections; + } catch (error) { + console.error(`Error detecting slider in ${imagePath}:`, error); + return null; + } + } + + async annotate( + imagePath: string, + boxes: BoundingBox[], + outputPath: string, + color: BoxColor = DEFAULT_DRAW_COLOR + ): Promise { + await this.drawBoundingBoxes(imagePath, boxes, outputPath, color); + } + + private async cannyEdge(image: sharp.Sharp): Promise { + const { data, info } = await image + .clone() + .grayscale() + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height } = info; + if (!width || !height) { + throw new Error('Cannot compute edges without image dimensions'); + } + + const sobelData = Buffer.alloc(width * height); + for (let y = 1; y < height - 1; y++) { + for (let x = 1; x < width - 1; x++) { + const idx = y * width + x; + const gx = + -data[(y - 1) * width + (x - 1)] - + 2 * data[y * width + (x - 1)] - + data[(y + 1) * width + (x - 1)] + + data[(y - 1) * width + (x + 1)] + + 2 * data[y * width + (x + 1)] + + data[(y + 1) * width + (x + 1)]; + const gy = + -data[(y - 1) * width + (x - 1)] - + 2 * data[(y - 1) * width + x] - + data[(y - 1) * width + (x + 1)] + + data[(y + 1) * width + (x - 1)] + + 2 * data[(y + 1) * width + x] + + data[(y + 1) * width + (x + 1)]; + const magnitude = Math.sqrt(gx * gx + gy * gy); + sobelData[idx] = magnitude > 50 ? 255 : 0; + } + } + + return sharp(sobelData, { + raw: { width, height, channels: 1 }, + }); + } + + private async refineBox( + box: BoundingBox, + edgeImage: sharp.Sharp + ): Promise { + try { + const { data, info } = await edgeImage + .clone() + .extract({ + left: box.x, + top: box.y, + width: box.width, + height: box.height, + }) + .raw() + .toBuffer({ resolveWithObject: true }); + + const { width, height } = info; + + const projX = new Array(width).fill(0); + const projY = new Array(height).fill(0); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + const pixel = data[y * width + x]; + if (pixel > 0) { + projX[x]++; + projY[y]++; + } + } + } + + const findBounds = (proj: number[], minThreshold = 2) => { + let start = -1; + let end = -1; + + for (let i = 0; i < proj.length; i++) { + if (proj[i] >= minThreshold) { + if (start === -1) start = i; + end = i; + } + } + + if (start === -1) { + return { start: 0, end: proj.length - 1 }; + } + + let bestStart = start; + for (let i = start; i < Math.min(proj.length, start + 10); i++) { + if (proj[i] >= minThreshold) { + bestStart = i; + break; + } + } + + let bestEnd = end; + for (let i = end; i >= Math.max(0, end - 10); i--) { + if (proj[i] >= minThreshold) { + bestEnd = i; + break; + } + } + + return { start: bestStart, end: bestEnd }; + }; + + const { start: xStart, end: xEnd } = findBounds(projX); + const { start: yStart, end: yEnd } = findBounds(projY); + + const newX = box.x + xStart; + const newY = box.y + yStart; + const newWidth = xEnd - xStart + 1; + const newHeight = yEnd - yStart + 1; + + if ( + newWidth <= 10 || + newHeight <= 10 || + newWidth > box.width * 1.2 || + newHeight > box.height * 1.2 + ) { + return box; + } + + return { + x: newX, + y: newY, + width: newWidth, + height: newHeight, + score: box.score, + }; + } catch (error) { + console.error('[RefineBox] Failed to refine candidate, returning original box.', error); + return box; + } + } + + private async drawBoundingBoxes( + imagePath: string, + boxes: BoundingBox[], + outputPath: string, + color: BoxColor = 'blue' + ): Promise { + if (boxes.length === 0) { + return; + } + + const colorMap: Record = { + red: { r: 255, g: 0, b: 0 }, + blue: { r: 0, g: 0, b: 255 }, + green: { r: 0, g: 255, b: 0 }, + }; + + const rgb = colorMap[color]; + + const image = sharp(imagePath); + const metadata = await image.metadata(); + + if (!metadata.width || !metadata.height) { + throw new Error('Cannot draw bounding boxes without image dimensions'); + } + + const rectangles = boxes + .map( + (box) => ` + ` + ) + .join('\n'); + + const svg = Buffer.from( + ` + ${rectangles} + ` + ); + + await image + .composite([{ input: svg, top: 0, left: 0 }]) + .toFile(outputPath); + } +} + +function toRawImage(data: Buffer, info: sharp.OutputInfo): RawImage { + const { width, height, channels } = info; + if (!width || !height || !channels) { + throw new Error('Failed to read image metadata.'); + } + return { data, width, height, channels }; +} diff --git a/src/slider/index.ts b/src/slider/index.ts new file mode 100644 index 0000000..b37b287 --- /dev/null +++ b/src/slider/index.ts @@ -0,0 +1,6 @@ +export { SliderController } from './slider-controller'; +export { SliderDetector } from './detector'; +export { SliderValidator } from './validator'; +export { SelfLearningSliderDetector } from './detector-self-learning'; +export type { BoundingBox, Rectangle, RawImage } from './types'; +export type { SliderSolveResult } from './slider-controller'; diff --git a/src/slider/slider-controller.ts b/src/slider/slider-controller.ts new file mode 100644 index 0000000..792ba05 --- /dev/null +++ b/src/slider/slider-controller.ts @@ -0,0 +1,1140 @@ +import { Page, Locator } from 'playwright'; +import { SliderDetector } from './detector'; +import { BoundingBox } from './types'; +import path from 'path'; +import os from 'os'; +import fs from 'fs/promises'; +import sharp from 'sharp'; + +export interface SliderSolveResult { + success: boolean; + attempts: number; + distance?: number; +} + +/** + * 滑块验证控制器 + */ +export class SliderController { + private detector: SliderDetector; + private maxAttempts: number = 10; + + constructor(maxAttempts: number = 10) { + this.detector = new SliderDetector(); + this.maxAttempts = maxAttempts; + } + + /** + * 尝试解决滑块验证 + * @param page Playwright 页面对象 + * @param sliderSelector 滑块按钮选择器(可选,默认自动查找) + * @param captchaSelector 验证码容器选择器 + * @returns 解决结果 + */ + async solveSlider( + page: Page, + sliderSelector?: string, + captchaSelector: string = '#tcaptcha_iframe' + ): Promise { + console.log('[SliderController] 开始滑块验证'); + + // 等待滑块验证窗口完全加载 + console.log('[SliderController] 等待滑块元素加载...'); + await page.waitForTimeout(2000); // 增加到2秒 + + let actualSliderSelector: string | null = null; + let sliderElement: Locator | null = null; + let sliderInIframe = false; + + const sliderSearchResult = await this.waitForSliderButton( + page, + sliderSelector, + captchaSelector + ); + + if (!sliderSearchResult) { + console.warn('[SliderController] 未找到滑块按钮元素,等待超时'); + return { success: false, attempts: 0 }; + } + + sliderElement = sliderSearchResult.element; + actualSliderSelector = sliderSearchResult.selector; + sliderInIframe = sliderSearchResult.insideIframe; + + for (let attempt = 1; attempt <= this.maxAttempts; attempt++) { + console.log(`[SliderController] 第 ${attempt}/${this.maxAttempts} 次尝试`); + + try { + // 确保滑块按钮仍然可见,如若刷新后需要重新定位 + if ( + !sliderElement || + !(await sliderElement + .isVisible({ timeout: 500 }) + .catch(() => false)) + ) { + const relocalResult = await this.waitForSliderButton( + page, + sliderSelector, + captchaSelector + ); + if (!relocalResult) { + console.warn('[SliderController] 滑块按钮不再可见,终止自动验证'); + return { success: false, attempts: attempt - 1 }; + } + sliderElement = relocalResult.element; + actualSliderSelector = relocalResult.selector; + sliderInIframe = relocalResult.insideIframe; + } + + const sliderButton = sliderElement!; + + // 获取滑块按钮的边界框以计算其宽度 + const sliderButtonBox = await sliderButton.boundingBox({ timeout: 5000 }); + if (!sliderButtonBox) { + console.error('[SliderController] ✗ 无法获取滑块按钮的尺寸,跳过此次尝试'); + continue; + } + + let iframeBox: { x: number; y: number; width: number; height: number } | null = null; + if (sliderInIframe) { + try { + const frameElement = page.locator(captchaSelector).first(); + iframeBox = await frameElement.boundingBox(); + } catch (e) { + console.error('[SliderController] ✗ 无法获取 iframe 边界框,可能影响距离计算'); + } + } + + const trackBox = await this.locateTrackBox( + page, + captchaSelector, + sliderInIframe, + iframeBox + ); + + // 截图验证码区域 + const { path: screenshotPath, scaleX, scaleY } = await this.captureSlider(page, captchaSelector); + console.log(`[SliderController] 已截图: ${screenshotPath} (scaleX=${scaleX.toFixed(2)}, scaleY=${scaleY.toFixed(2)})`); + + // 检测滑块缺口 + console.log('[SliderController] 开始检测滑块缺口...'); + const { name, ext } = path.parse(screenshotPath); + const annotatedDir = path.resolve(process.cwd(), 'output'); + await fs.mkdir(annotatedDir, { recursive: true }); + const detectionOutputPath = path.join(annotatedDir, `${name}-detected${ext}`); + const boxes = await this.detector.detectSlider( + screenshotPath, + detectionOutputPath, + true + ); + + if (!boxes || boxes.length === 0) { + console.log('[SliderController] ✗ 未检测到滑块缺口,尝试刷新验证码'); + await this.refreshCaptcha(page); + await page.waitForTimeout(2000); // 等待刷新后加载 + continue; + } + + console.log( + `[SliderController] 检测结果图片: ${path.basename( + detectionOutputPath + )}` + ); + + console.log(`[SliderController] ✓ 检测到 ${boxes.length} 个滑块候选区域`); + const displayX = boxes[0].x / (scaleX || 1); + const displayY = boxes[0].y / (scaleY || 1); + const displayInfo = trackBox + ? `轨道起点=${trackBox.left.toFixed(2)}, 页面估算(x=${( + displayX + trackBox.left + ).toFixed(2)}, y=${displayY.toFixed(2)})` + : `页面估算(x=${displayX.toFixed(2)}, y=${displayY.toFixed(2)})`; + console.log( + `[SliderController] 第一个滑块位置: 图像坐标(x=${boxes[0].x}, y=${boxes[0].y}), ${displayInfo}` + ); + + // 计算滑动距离 + const distance = this.calculateDistance( + boxes, + sliderButtonBox, + iframeBox, + { x: scaleX, y: scaleY }, + trackBox + ); + console.log(`[SliderController] 计算滑动距离: ${distance}px`); + + // 执行滑动 + await this.dragSlider(page, sliderButton, distance); + + // 等待验证结果 - 最多等待3秒让窗口消失 + console.log('[SliderController] 等待验证结果...'); + let verificationSuccess = false; + + for (let i = 0; i < 6; i++) { // 6次 x 500ms = 3秒 + await page.waitForTimeout(500); + const isCaptchaVisible = await this.isCaptchaVisible(page, captchaSelector); + + if (!isCaptchaVisible) { + verificationSuccess = true; + console.log(`[SliderController] ✓ 滑块验证成功!(${(i + 1) * 500}ms后窗口消失)`); + return { success: true, attempts: attempt, distance }; + } + } + + // 3秒后仍然可见,认为验证失败 + if (!verificationSuccess) { + console.log('[SliderController] ✗ 验证失败,准备下一次尝试'); + } + + // 刷新验证码 + if (attempt < this.maxAttempts) { + await this.refreshCaptcha(page); + await page.waitForTimeout(1500); // 等待刷新加载 + } + + } catch (error) { + console.error(`[SliderController] 第 ${attempt} 次尝试出错:`, error); + + if (attempt < this.maxAttempts) { + await page.waitForTimeout(1000); + } + } + } + + console.log('[SliderController] ✗ 滑块验证失败,已达最大尝试次数'); + return { success: false, attempts: this.maxAttempts }; + } + + /** + * 截取滑块验证码区域 + */ + private async captureSlider( + page: Page, + captchaSelector: string + ): Promise<{ path: string; scaleX: number; scaleY: number }> { + const rawDir = path.resolve(process.cwd(), 'noflag'); + const annotatedDir = path.resolve(process.cwd(), 'output'); + await Promise.all([ + fs.mkdir(rawDir, { recursive: true }), + fs.mkdir(annotatedDir, { recursive: true }), + ]); + + const screenshotPath = path.join(rawDir, `captcha-${Date.now()}.png`); + + let captured = false; + let displayWidth: number | null = null; + let displayHeight: number | null = null; + + try { + const downloadAttempt = await this.tryDownloadOriginalImage( + page, + captchaSelector, + screenshotPath + ); + if (downloadAttempt.success) { + captured = true; + if ( + downloadAttempt.displayWidth && + downloadAttempt.displayWidth > 0 + ) { + displayWidth = downloadAttempt.displayWidth; + } + if ( + downloadAttempt.displayHeight && + downloadAttempt.displayHeight > 0 + ) { + displayHeight = downloadAttempt.displayHeight; + } + } + + const attemptCapture = async ( + description: string, + captureFn: () => Promise, + displaySize?: { width: number | null; height: number | null } + ): Promise => { + try { + await captureFn(); + + if (await this.isImageBlank(screenshotPath)) { + console.warn( + `[SliderController] ⚠️ ${description},但截图为空白,尝试其他方式` + ); + await fs.unlink(screenshotPath).catch(() => {}); + return false; + } + + if (displaySize) { + if (displaySize.width && displaySize.width > 0) { + displayWidth = displayWidth ?? displaySize.width; + } + if (displaySize.height && displaySize.height > 0) { + displayHeight = displayHeight ?? displaySize.height; + } + } + + console.log(`[SliderController] ✓ ${description}`); + return true; + } catch (error) { + console.log(`[SliderController] ✗ ${description}:`, error); + return false; + } + }; + + // 首先尝试在 iframe 内截取背景图片 + if (captchaSelector.includes('iframe')) { + try { + const iframe = page.frameLocator(captchaSelector); + const bgSelectors = ['#slideBg', '.tc-bg-img', 'canvas', 'img']; + + for (const selector of bgSelectors) { + try { + const bgElement = iframe.locator(selector).first(); + const bounding = await bgElement.boundingBox().catch(() => null); + const success = await attemptCapture( + `在 iframe 内截图成功: ${selector}`, + () => + bgElement.screenshot({ path: screenshotPath, timeout: 3000 }), + bounding + ? { width: bounding.width, height: bounding.height } + : undefined + ); + if (success) { + captured = true; + break; + } + } catch { + continue; + } + } + } catch (error) { + console.log('[SliderController] iframe 截图失败:', error); + } + } + + // 如果 iframe 方式失败,尝试在主页面查找 + if (!captured) { + const bgSelectors = [ + '#slideBg', + '.tc-bg-img', + 'img[id*="slide"]', + '.tcaptcha-transform img', + '[class*="bg-img"]' + ]; + + for (const selector of bgSelectors) { + try { + const bgElement = page.locator(selector).first(); + if (await bgElement.isVisible({ timeout: 2000 })) { + const success = await attemptCapture( + `在主页面截图成功: ${selector}`, + () => bgElement.screenshot({ path: screenshotPath }), + await bgElement + .boundingBox() + .catch(() => null) + .then((box) => + box ? { width: box.width, height: box.height } : undefined + ) + ); + if (success) { + captured = true; + break; + } + } + } catch { + continue; + } + } + } + + if (!captured) { + // 最后尝试截取整个验证码容器 + const captcha = page.locator(captchaSelector).first(); + try { + const box = await captcha.boundingBox(); + if (box) { + const padding = 0; + const success = await attemptCapture( + '使用页面截取验证码容器', + () => + page.screenshot({ + path: screenshotPath, + clip: { + x: Math.max(box.x - padding, 0), + y: Math.max(box.y - padding, 0), + width: box.width + padding * 2, + height: box.height + padding * 2, + }, + }), + { width: box.width, height: box.height } + ); + + captured = success; + + if (!success) { + const fallbackSuccess = await attemptCapture('直接通过容器截图', () => + captcha.screenshot({ path: screenshotPath }) + ); + captured = captured || fallbackSuccess; + } + } else { + const fallbackSuccess = await attemptCapture('直接通过容器截图', () => + captcha.screenshot({ path: screenshotPath }) + ); + captured = captured || fallbackSuccess; + } + } catch (error) { + console.error('[SliderController] ✗ 使用容器截图失败:', error); + throw error; + } + } + + if (captured && (await this.isImageBlank(screenshotPath))) { + console.warn('[SliderController] ⚠️ 最终截图仍为空白,可能无法进行检测'); + } + } catch (error) { + console.error('[SliderController] ✗ 截图失败:', error); + throw error; + } + + const sizeInfo = await this.ensureMinimumWidth(screenshotPath, 800); + const finalWidth = + sizeInfo.finalWidth ?? sizeInfo.originalWidth ?? displayWidth ?? 0; + const finalHeight = + sizeInfo.finalHeight ?? sizeInfo.originalHeight ?? displayHeight ?? 0; + + const effectiveDisplayWidth = + displayWidth && displayWidth > 0 ? displayWidth : finalWidth; + const effectiveDisplayHeight = + displayHeight && displayHeight > 0 ? displayHeight : finalHeight; + + const scaleX = + finalWidth > 0 && effectiveDisplayWidth > 0 + ? finalWidth / effectiveDisplayWidth + : 1; + const scaleY = + finalHeight > 0 && effectiveDisplayHeight > 0 + ? finalHeight / effectiveDisplayHeight + : 1; + + return { + path: screenshotPath, + scaleX, + scaleY, + }; + } + + private async isImageBlank(imagePath: string): Promise { + try { + const stats = await sharp(imagePath).stats(); + if (!stats?.channels?.length) { + return false; + } + const relevantChannels = + stats.channels.length > 3 + ? stats.channels.slice(0, 3) + : stats.channels; + + const dynamicRangeSmall = relevantChannels.every( + (channel) => (channel.max ?? 0) - (channel.min ?? 0) < 5 + ); + + const lowEntropy = (stats.entropy ?? 0) < 0.5; + + return dynamicRangeSmall && lowEntropy; + } catch (error) { + console.warn('[SliderController] 无法分析截图内容:', error); + return false; + } + } + + private async tryDownloadOriginalImage( + page: Page, + captchaSelector: string, + targetPath: string + ): Promise<{ + success: boolean; + displayWidth: number | null; + displayHeight: number | null; + }> { + const failure = { + success: false, + displayWidth: null, + displayHeight: null, + }; + + if (!captchaSelector.includes('iframe')) { + return failure; + } + + try { + const frameHandle = await page.$(captchaSelector); + const frame = await frameHandle?.contentFrame(); + await frameHandle?.dispose(); + + if (!frame) { + return failure; + } + + type CaptchaResource = + | { + kind: 'img' | 'background'; + source: string; + naturalWidth: number; + naturalHeight: number; + displayWidth: number; + displayHeight: number; + } + | { + kind: 'canvas'; + dataUrl: string; + naturalWidth: number; + naturalHeight: number; + displayWidth: number; + displayHeight: number; + } + | null; + + const resource = await frame.evaluate(() => { + const selectors = [ + '#slideBg', + '.tc-bg-img', + '.tc-bg-img img', + 'img[id*="slide"]', + 'img[id*="bg"]', + 'canvas', + ]; + + const parseBackground = (value: string | null): string | null => { + if (!value || !value.startsWith('url')) { + return null; + } + const match = value.match(/url\(["']?(.*?)["']?\)/); + return match ? match[1] : null; + }; + + for (const selector of selectors) { + const element = document.querySelector(selector); + if (!element) { + continue; + } + + try { + const rect = element.getBoundingClientRect(); + const displayWidth = Math.round(rect.width); + const displayHeight = Math.round(rect.height); + + if (element instanceof HTMLCanvasElement) { + return { + kind: 'canvas', + dataUrl: element.toDataURL('image/png'), + naturalWidth: element.width, + naturalHeight: element.height, + displayWidth, + displayHeight, + }; + } + + if (element instanceof HTMLImageElement) { + const src = element.currentSrc || element.src; + if (!src) { + continue; + } + + return { + kind: 'img', + source: src, + naturalWidth: element.naturalWidth || element.width, + naturalHeight: element.naturalHeight || element.height, + displayWidth, + displayHeight, + }; + } + + const backgroundImage = window + .getComputedStyle(element) + .getPropertyValue('background-image'); + const bgSrc = parseBackground(backgroundImage); + if (bgSrc) { + return { + kind: 'background', + source: bgSrc, + naturalWidth: displayWidth, + naturalHeight: displayHeight, + displayWidth, + displayHeight, + }; + } + } catch (error) { + console.warn('[SliderController] 解析验证码资源失败:', error); + continue; + } + } + + return null; + }); + + if (!resource) { + return failure; + } + + if (resource.kind === 'canvas') { + await this.writeDataUrlToFile(resource.dataUrl, targetPath); + console.log( + `[SliderController] ✓ 通过 canvas 导出原始验证码 (${resource.naturalWidth}x${resource.naturalHeight})` + ); + return { + success: true, + displayWidth: resource.displayWidth, + displayHeight: resource.displayHeight, + }; + } + + if ('source' in resource && resource.source) { + const buffer = await this.fetchImageBuffer(page, resource.source); + if (!buffer) { + return failure; + } + + await fs.writeFile(targetPath, buffer); + console.log( + `[SliderController] ✓ 直接下载验证码图片 (${resource.naturalWidth}x${resource.naturalHeight})` + ); + return { + success: true, + displayWidth: resource.displayWidth, + displayHeight: resource.displayHeight, + }; + } + + return failure; + } catch (error) { + console.log('[SliderController] 无法直接下载验证码图片:', error); + return failure; + } + } + + private async fetchImageBuffer(page: Page, url: string): Promise { + try { + if (url.startsWith('data:')) { + return this.dataUrlToBuffer(url); + } + + const response = await page.context().request.get(url, { + headers: { Referer: page.url() }, + }); + + if (!response.ok()) { + console.warn( + `[SliderController] 获取验证码原图失败: ${response.status()} ${response.statusText()}` + ); + return null; + } + + const body = await response.body(); + return Buffer.from(body); + } catch (error) { + console.warn('[SliderController] 请求验证码原图出错:', error); + return null; + } + } + + private async writeDataUrlToFile(dataUrl: string, targetPath: string): Promise { + const buffer = this.dataUrlToBuffer(dataUrl); + await fs.writeFile(targetPath, buffer); + } + + private dataUrlToBuffer(dataUrl: string): Buffer { + const commaIndex = dataUrl.indexOf(','); + const base64 = commaIndex >= 0 ? dataUrl.slice(commaIndex + 1) : dataUrl; + return Buffer.from(base64, 'base64'); + } + + private async ensureMinimumWidth( + imagePath: string, + minWidth: number + ): Promise<{ + multiplier: number; + originalWidth: number | null; + originalHeight: number | null; + finalWidth: number | null; + finalHeight: number | null; + }> { + try { + const metadata = await sharp(imagePath).metadata(); + const originalWidth = metadata.width ?? null; + const originalHeight = metadata.height ?? null; + + if (!originalWidth) { + return { + multiplier: 1, + originalWidth: null, + originalHeight, + finalWidth: null, + finalHeight: originalHeight, + }; + } + + if (originalWidth >= minWidth) { + const buffer = await sharp(imagePath) + .withMetadata({ density: 240 }) + .toBuffer(); + await fs.writeFile(imagePath, buffer); + + return { + multiplier: 1, + originalWidth, + originalHeight, + finalWidth: originalWidth, + finalHeight: originalHeight, + }; + } + + const ratio = minWidth / originalWidth; + const pipeline = sharp(imagePath).resize({ width: minWidth }); + const resizedBuffer = await pipeline + .withMetadata({ density: 240 }) + .toBuffer(); + await fs.writeFile(imagePath, resizedBuffer); + + const finalMeta = await sharp(imagePath).metadata(); + const finalWidth = finalMeta.width ?? minWidth; + const finalHeight = + finalMeta.height ?? + (originalHeight ? Math.round(originalHeight * ratio) : null); + + console.log( + `[SliderController] ✓ 已将截图扩展至 ${finalWidth}x${finalHeight ?? '未知'} (原始宽度 ${originalWidth}px)` + ); + + return { + multiplier: ratio, + originalWidth, + originalHeight, + finalWidth, + finalHeight, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.warn( + `[SliderController] ⚠️ 放大截图时出错 (${message}),继续使用原始尺寸` + ); + return { + multiplier: 1, + originalWidth: null, + originalHeight: null, + finalWidth: null, + finalHeight: null, + }; + } + } + + private async waitForSliderButton( + page: Page, + sliderSelector: string | undefined, + captchaSelector: string + ): Promise<{ element: Locator; selector: string; insideIframe: boolean } | null> { + const selectors = sliderSelector + ? [sliderSelector] + : [ + '.tc-fg-item', + '.tc-slider-normal', + '.tc-fg-item.tc-slider-normal', + '.tcaptcha_drag_button', + '#tcaptcha_drag_button', + '.tc-drag-button', + '[id*="drag"]', + '.slide-verify-slider-mask-item', + 'div[class*="drag"]', + '#tcaptcha_drag_thumb', + ]; + + const start = Date.now(); + const maxWaitMs = 15000; + const pollInterval = 500; + let lastProgressLog = 0; + let hasLoggedInitialWait = false; + + const logProgress = () => { + const elapsed = Date.now() - start; + if (!hasLoggedInitialWait) { + console.log('[SliderController] 滑块按钮尚未出现,等待加载...'); + hasLoggedInitialWait = true; + lastProgressLog = elapsed; + } else if (elapsed - lastProgressLog >= 2000) { + console.log( + `[SliderController] 滑块按钮仍在加载,已等待 ${(elapsed / 1000).toFixed(1)}s` + ); + lastProgressLog = elapsed; + } + }; + + while (Date.now() - start < maxWaitMs) { + if (captchaSelector.includes('iframe')) { + try { + const frameLocator = page.frameLocator(captchaSelector); + for (const selector of selectors) { + try { + const locator = frameLocator.locator(selector).first(); + if (await locator.isVisible({ timeout: 200 })) { + console.log(`[SliderController] ✓ 在 iframe 内找到滑块按钮: ${selector}`); + return { element: locator, selector, insideIframe: true }; + } + } catch { + continue; + } + } + } catch { + logProgress(); + await page.waitForTimeout(pollInterval); + continue; + } + } + + for (const selector of selectors) { + try { + const locator = page.locator(selector).first(); + if (await locator.isVisible({ timeout: 200 })) { + console.log(`[SliderController] ✓ 在主页面找到滑块按钮: ${selector}`); + return { element: locator, selector, insideIframe: false }; + } + } catch { + continue; + } + } + + logProgress(); + await page.waitForTimeout(pollInterval); + } + + return null; + } + + private async locateTrackBox( + page: Page, + captchaSelector: string, + insideIframe: boolean, + iframeBox: { x: number; y: number; width: number; height: number } | null + ): Promise<{ left: number; width: number } | null> { + const trackSelectors = [ + '#slideBg', + '.tc-bg-img', + '.tc-bg-img img', + '.tc-bg', + 'img[id*="slide"]', + 'img[id*="bg"]', + 'canvas', + ]; + + const resolveBox = async (locator: Locator) => { + try { + const box = await locator.boundingBox(); + if (!box || box.width <= 0) { + return null; + } + + let left = box.x; + if (insideIframe && iframeBox) { + left -= iframeBox.x; + } + return { left, width: box.width }; + } catch { + return null; + } + }; + + if (insideIframe) { + try { + const frameLocator = page.frameLocator(captchaSelector); + for (const selector of trackSelectors) { + const locator = frameLocator.locator(selector).first(); + const visible = await locator.isVisible({ timeout: 200 }).catch(() => false); + if (!visible) { + continue; + } + const box = await resolveBox(locator); + if (box) { + return box; + } + } + } catch { + return null; + } + } else { + for (const selector of trackSelectors) { + const locator = page.locator(selector).first(); + const visible = await locator.isVisible({ timeout: 200 }).catch(() => false); + if (!visible) { + continue; + } + const box = await resolveBox(locator); + if (box) { + return box; + } + } + } + + return null; + } + + private calculateDistance( + boxes: BoundingBox[], + sliderButtonBox: { x: number; width: number }, + iframeBox: { x: number; y: number } | null, + scale: { x: number; y: number }, + trackBox: { left: number; width: number } | null + ): number { + const scaleX = scale.x || 1; + + // 按x坐标排序,左边的是b1(滑块),右边的是b2(缺口) + const sorted = [...boxes].sort((a, b) => a.x - b.x); + + console.log(`[SliderController] 检测到 ${boxes.length} 个滑块`); + sorted.forEach((box, i) => { + console.log(` - 滑块${i}: x=${box.x}, width=${box.width}, score=${box.score.toFixed(2)}`); + }); + + let distance: number; + + if (sorted.length >= 2) { + // 双滑块模式:最简单、最准确的计算 + // 移动距离 = (b2左边界 - b1左边界) / scaleX + const b1 = sorted[0]; // 左边的滑块 + const b2 = sorted[1]; // 右边的缺口 + + distance = (b2.x - b1.x) / scaleX; + + console.log(`[SliderController] 双滑块模式:`); + console.log(` - b1(滑块)左边界: ${b1.x}px (图像坐标)`); + console.log(` - b2(缺口)左边界: ${b2.x}px (图像坐标)`); + console.log(` - scaleX: ${scaleX}`); + console.log(` - 移动距离 = (${b2.x} - ${b1.x}) / ${scaleX} = ${distance.toFixed(2)}px`); + } else { + // 单滑块模式:只检测到缺口,需要从DOM获取滑块位置 + const target = sorted[0]; + const sliderLeft = iframeBox ? sliderButtonBox.x - iframeBox.x : sliderButtonBox.x; + const trackLeft = trackBox ? trackBox.left : 0; + + // 缺口在页面上的位置 = 轨道左边界 + 缺口在图像中的x / scaleX + const targetLeft = trackLeft + target.x / scaleX; + + distance = targetLeft - sliderLeft; + + console.log(`[SliderController] 单滑块模式:`); + console.log(` - 缺口图像x: ${target.x}px`); + console.log(` - 滑块页面x: ${sliderLeft.toFixed(2)}px (iframe内)`); + console.log(` - 轨道左边界: ${trackLeft.toFixed(2)}px`); + console.log(` - 缺口页面x: ${targetLeft.toFixed(2)}px`); + console.log(` - 移动距离 = ${targetLeft.toFixed(2)} - ${sliderLeft.toFixed(2)} = ${distance.toFixed(2)}px`); + } + + // 添加小的随机偏移,模拟人类行为 + const randomOffset = (Math.random() - 0.5) * 3; + const finalDistance = Math.max(0, distance + randomOffset); + + console.log(`[SliderController] 最终移动距离: ${finalDistance.toFixed(2)}px (含随机偏移${randomOffset.toFixed(2)}px)`); + + return finalDistance; + } + + /** + * 执行滑块拖动 + */ + private async dragSlider(page: Page, sliderButton: any, distance: number): Promise { + console.log(`[SliderController] 开始拖动滑块 ${distance.toFixed(2)}px`); + + try { + const box = await sliderButton.boundingBox({ timeout: 5000 }); + if (!box) { + throw new Error('无法获取滑块位置'); + } + + // 滑块起始位置 + const startX = box.x + box.width / 2; + const startY = box.y + box.height / 2; + + // 移动鼠标到滑块中心 + await page.mouse.move(startX, startY, { steps: 5 }); + await page.waitForTimeout(200 + Math.random() * 100); + + // 按下鼠标 + await page.mouse.down(); + await page.waitForTimeout(150 + Math.random() * 100); + + // 模拟人类滑动轨迹 + const endX = startX + distance; + await page.mouse.move(endX, startY + (Math.random() - 0.5) * 8, { steps: 20 + Math.floor(Math.random() * 10) }); + await page.waitForTimeout(250 + Math.random() * 150); + + // 释放鼠标 + await page.mouse.up(); + + console.log('[SliderController] 滑块拖动完成'); + } catch (error) { + console.error('[SliderController] 拖动滑块时出错:', error); + throw error; + } + } + + /** + * 生成类人滑动轨迹 + */ + private generateHumanLikeTrack(distance: number, steps: number): number[] { + const tracks: number[] = []; + let current = 0; + + // 加速阶段 (前 2/5) + const accelerateSteps = Math.floor(steps * 0.4); + for (let i = 0; i < accelerateSteps; i++) { + const progress = i / accelerateSteps; + const move = distance * 0.6 * (progress ** 2); + tracks.push(move); + current = move; + } + + // 匀速阶段 (中间 2/5) + const constantSteps = Math.floor(steps * 0.4); + const constantSpeed = (distance * 0.35) / constantSteps; + for (let i = 0; i < constantSteps; i++) { + current += constantSpeed; + tracks.push(current); + } + + // 减速阶段 (最后 1/5) + const decelerateSteps = steps - accelerateSteps - constantSteps; + const remaining = distance - current; + for (let i = 0; i < decelerateSteps; i++) { + const progress = 1 - (i / decelerateSteps); + current += remaining * progress / decelerateSteps; + tracks.push(current); + } + + // 确保最后到达目标位置 + tracks[tracks.length - 1] = distance; + + return tracks; + } + + /** + * 刷新验证码 + */ + private async refreshCaptcha(page: Page): Promise { + try { + // 先在 iframe 内查找刷新按钮 + const iframeSelectors = [ + 'iframe[src*="captcha"]', + 'iframe[id*="captcha"]', + 'iframe[id*="tcaptcha"]' + ]; + + for (const iframeSelector of iframeSelectors) { + try { + const frame = page.frameLocator(iframeSelector); + const refreshSelectors = [ + // 腾讯云刷新按钮(根据 alt 属性) + 'img.tc-action-icon[alt="刷新验证"]', + '.tc-action-icon[alt*="刷新"]', + // 其他可能的选择器 + '.tc-action-icon', + '.tcaptcha_refresh', + '.tc-refresh', + ]; + + for (const selector of refreshSelectors) { + try { + const refreshBtn = frame.locator(selector).first(); + await refreshBtn.click({ timeout: 2000 }); + console.log(`[SliderController] ✓ 在 iframe 内点击刷新按钮: ${selector}`); + await page.waitForTimeout(1500); // 等待新图片加载 + return; + } catch { + continue; + } + } + } catch { + continue; + } + } + + // 如果 iframe 内没找到,尝试主页面 + const mainRefreshSelectors = [ + '#reload', + '[class*="refresh"]', + '[id*="refresh"]' + ]; + + for (const selector of mainRefreshSelectors) { + try { + const refreshBtn = page.locator(selector).first(); + if (await refreshBtn.isVisible({ timeout: 2000 })) { + await refreshBtn.click(); + console.log(`[SliderController] ✓ 在主页面点击刷新按钮: ${selector}`); + await page.waitForTimeout(1500); + return; + } + } catch { + continue; + } + } + + console.log('[SliderController] ✗ 未找到刷新按钮'); + } catch (error) { + console.log('[SliderController] ✗ 刷新验证码失败:', error); + } + } + + /** + * 检查验证码是否仍然可见(验证是否成功) + */ + private async isCaptchaVisible(page: Page, captchaSelector: string): Promise { + try { + // 方法1: 检查 iframe 容器是否消失 + const captchaContainer = page.locator(captchaSelector).first(); + const containerVisible = await captchaContainer.isVisible({ timeout: 1000 }).catch(() => false); + + if (!containerVisible) { + console.log('[SliderController] ✓ 验证码容器已消失'); + return false; // 容器消失 = 验证成功 + } + + // 方法2: 检查 iframe 内是否有滑块按钮(如果按钮消失可能是成功了) + if (captchaSelector.includes('iframe')) { + try { + const iframe = page.frameLocator(captchaSelector); + const sliderButton = iframe.locator('.tc-fg-item, .tc-slider-normal').first(); + const buttonExists = await sliderButton.isVisible({ timeout: 500 }).catch(() => false); + + if (!buttonExists) { + console.log('[SliderController] ✓ 滑块按钮已消失'); + return false; // 按钮消失 = 验证成功 + } + + // 方法3: 检查是否有成功标识 + const successIndicators = [ + '.tc-jqpuzzle-success', + '[class*="success"]', + '.tc-verify-success' + ]; + + for (const selector of successIndicators) { + const successEl = iframe.locator(selector).first(); + const hasSuccess = await successEl.isVisible({ timeout: 300 }).catch(() => false); + if (hasSuccess) { + console.log(`[SliderController] ✓ 检测到成功标识: ${selector}`); + return false; // 有成功标识 = 验证成功 + } + } + } catch { + // iframe 访问失败也可能意味着已经消失 + console.log('[SliderController] ✓ 无法访问 iframe(可能已关闭)'); + return false; + } + } + + // 都没有检测到成功迹象,认为仍然可见 + return true; + } catch (error) { + console.log('[SliderController] 检查验证码状态时出错:', error); + return false; // 出错时假设已成功 + } + } +} diff --git a/src/slider/types.ts b/src/slider/types.ts new file mode 100644 index 0000000..7d9cd93 --- /dev/null +++ b/src/slider/types.ts @@ -0,0 +1,17 @@ +export interface Rectangle { + x: number; + y: number; + width: number; + height: number; +} + +export interface BoundingBox extends Rectangle { + score: number; +} + +export interface RawImage { + data: Buffer; + width: number; + height: number; + channels: number; +} diff --git a/src/slider/utils/geometry.ts b/src/slider/utils/geometry.ts new file mode 100644 index 0000000..04f3bbb --- /dev/null +++ b/src/slider/utils/geometry.ts @@ -0,0 +1,16 @@ +import { Rectangle } from '../types'; + +/** + * Calculate intersection over union for two bounding boxes. + */ +export function calculateIoU(a: Rectangle, b: Rectangle): number { + const x1 = Math.max(a.x, b.x); + const y1 = Math.max(a.y, b.y); + const x2 = Math.min(a.x + a.width, b.x + b.width); + const y2 = Math.min(a.y + a.height, b.y + b.height); + + const intersection = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); + const union = a.width * a.height + b.width * b.height - intersection; + + return union === 0 ? 0 : intersection / union; +} diff --git a/src/slider/utils/image.ts b/src/slider/utils/image.ts new file mode 100644 index 0000000..2b1c37f --- /dev/null +++ b/src/slider/utils/image.ts @@ -0,0 +1,136 @@ +import { RawImage } from '../types'; + +/** + * Convert RGB data to grayscale array. + */ +function toGrayscale( + data: Buffer, + width: number, + height: number, + channels: number +): Uint8Array { + const gray = new Uint8Array(width * height); + for (let i = 0; i < width * height; i++) { + const idx = i * channels; + gray[i] = Math.round( + data[idx] * 0.299 + data[idx + 1] * 0.587 + data[idx + 2] * 0.114 + ); + } + return gray; +} + +/** + * Produce a Sobel edge map from raw RGB data. + */ +export function createEdgeMap({ + data, + width, + height, + channels, +}: RawImage): Uint8Array { + const gray = toGrayscale(data, width, height, channels); + const edges = new Uint8Array(width * height); + + for (let y = 1; y < height - 1; y++) { + for (let x = 1; x < width - 1; x++) { + const idx = y * width + x; + const gx = + -gray[(y - 1) * width + (x - 1)] + + gray[(y - 1) * width + (x + 1)] - + 2 * gray[idx - 1] + + 2 * gray[idx + 1] - + gray[(y + 1) * width + (x - 1)] + + gray[(y + 1) * width + (x + 1)]; + + const gy = + -gray[(y - 1) * width + (x - 1)] - + 2 * gray[(y - 1) * width + x] - + gray[(y - 1) * width + (x + 1)] + + gray[(y + 1) * width + (x - 1)] + + 2 * gray[(y + 1) * width + x] + + gray[(y + 1) * width + (x + 1)]; + + const magnitude = Math.sqrt(gx * gx + gy * gy); + edges[idx] = magnitude > 40 ? 1 : 0; + } + } + + return edges; +} + +/** + * Morphological closing (dilate followed by erode). + */ +export function morphologyClose( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number +): Uint8Array { + const dilated = dilate(binary, width, height, kernelSize); + return erode(dilated, width, height, kernelSize); +} + +export function dilate( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number +): Uint8Array { + const result = new Uint8Array(width * height); + const offset = Math.floor(kernelSize / 2); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + let maxVal = 0; + + for (let ky = -offset; ky <= offset; ky++) { + for (let kx = -offset; kx <= offset; kx++) { + const ny = y + ky; + const nx = x + kx; + + if (nx >= 0 && nx < width && ny >= 0 && ny < height) { + maxVal = Math.max(maxVal, binary[ny * width + nx]); + } + } + } + + result[y * width + x] = maxVal; + } + } + + return result; +} + +export function erode( + binary: Uint8Array, + width: number, + height: number, + kernelSize: number +): Uint8Array { + const result = new Uint8Array(width * height); + const offset = Math.floor(kernelSize / 2); + + for (let y = 0; y < height; y++) { + for (let x = 0; x < width; x++) { + let minVal = 1; + + for (let ky = -offset; ky <= offset; ky++) { + for (let kx = -offset; kx <= offset; kx++) { + const ny = y + ky; + const nx = x + kx; + + if (nx >= 0 && nx < width && ny >= 0 && ny < height) { + minVal = Math.min(minVal, binary[ny * width + nx]); + } + } + } + + result[y * width + x] = minVal; + } + } + + return result; +} + +export { toGrayscale }; diff --git a/src/slider/validator.ts b/src/slider/validator.ts new file mode 100644 index 0000000..c6f7fd4 --- /dev/null +++ b/src/slider/validator.ts @@ -0,0 +1,105 @@ +import { BoundingBox, Rectangle } from './types'; +import { calculateIoU } from './utils/geometry'; + +class SliderValidator { + + /** + * 检查两个框是否匹配(允许一定偏差) + */ + isBoxMatching(detected: Rectangle, target: Rectangle, tolerance: number = 10): boolean { + // 计算中心点 + const detectedCenterX = detected.x + detected.width / 2; + const detectedCenterY = detected.y + detected.height / 2; + const targetCenterX = target.x + target.width / 2; + const targetCenterY = target.y + target.height / 2; + + // 中心点距离 + const centerDistance = Math.sqrt( + Math.pow(detectedCenterX - targetCenterX, 2) + + Math.pow(detectedCenterY - targetCenterY, 2) + ); + + // 尺寸差异 - 允许更大的容差,因为形态学操作可能改变大小 + const widthDiff = Math.abs(detected.width - target.width); + const heightDiff = Math.abs(detected.height - target.height); + + // 如果中心点距离小于容差,且尺寸差异不太大,认为匹配 + // 放宽尺寸容差到30px(考虑到形态学操作的影响) + return centerDistance <= tolerance && widthDiff <= 30 && heightDiff <= 30; + } + + /** + * 计算IoU(交并比) + */ + calculateIoU(box1: Rectangle, box2: Rectangle): number { + return calculateIoU(box1, box2); + } + + /** + * 验证检测结果 + */ + async validateDetection( + detectedBoxes: Rectangle[], + targetBoxes: Rectangle[], + tolerance: number = 10 + ): Promise<{ + totalTargets: number; + detectedCount: number; + matchedCount: number; + precision: number; + recall: number; + matches: Array<{ detected: Rectangle; target: Rectangle; iou: number }>; + unmatched: Rectangle[]; + }> { + const matches: Array<{ detected: Rectangle; target: Rectangle; iou: number }> = []; + const matchedTargets = new Set(); + const matchedDetected = new Set(); + + // 1. 找出所有可能的匹配对 + const potentialMatches: Array<{ detIdx: number; tarIdx: number; iou: number }> = []; + for (let i = 0; i < detectedBoxes.length; i++) { + for (let j = 0; j < targetBoxes.length; j++) { + if (this.isBoxMatching(detectedBoxes[i], targetBoxes[j], tolerance)) { + const iou = this.calculateIoU(detectedBoxes[i], targetBoxes[j]); + if (iou > 0.1) { // 设置一个IoU的下限 + potentialMatches.push({ detIdx: i, tarIdx: j, iou }); + } + } + } + } + + // 2. 按IoU从高到低排序 + potentialMatches.sort((a, b) => b.iou - a.iou); + + // 3. 贪心选择最佳匹配 + for (const match of potentialMatches) { + if (!matchedDetected.has(match.detIdx) && !matchedTargets.has(match.tarIdx)) { + matches.push({ + detected: detectedBoxes[match.detIdx], + target: targetBoxes[match.tarIdx], + iou: match.iou + }); + matchedDetected.add(match.detIdx); + matchedTargets.add(match.tarIdx); + } + } + + // 未匹配的检测框 + const unmatched = detectedBoxes.filter((_, i) => !matchedDetected.has(i)); + + const precision = detectedBoxes.length > 0 ? matches.length / detectedBoxes.length : 0; + const recall = targetBoxes.length > 0 ? matches.length / targetBoxes.length : 0; + + return { + totalTargets: targetBoxes.length, + detectedCount: detectedBoxes.length, + matchedCount: matches.length, + precision, + recall, + matches, + unmatched + }; + } +} + +export { SliderValidator, BoundingBox, Rectangle }; diff --git a/todolist.md b/todolist.md new file mode 100644 index 0000000..127dc03 --- /dev/null +++ b/todolist.md @@ -0,0 +1,19 @@ + +## todo +1. 滑块检测,自动验证 +2. 短信自动提取 + +## html +整个浮窗代码: +
AI生成背景
success
&nbsp;slider
+ +尝试不成功点击刷新,换一张图片重试。刷新html: +刷新验证 + + +拖动按钮html: +
&nbsp;slider
+ + +:q +