Commit a60a703a authored by 曹金梅's avatar 曹金梅

小小更新

parent a3f758ec
# test-jiwei-web
测试组测试爱集微官网用midscene
\ No newline at end of file
.DS_Store
node_modules/
/test-results/
/playwright-report/
/blob-report/
/playwright/.cache/
/command-line/*.json
# Midscene.js dump files
**/midscene_run/report/**
**/midscene_run/dump/
**/midscene_run/log/
playwright-report/
dist/
test-results/
.env
output/
.DS_Store
pnpm-lock.yaml
# Midscene.js dump files
**/midscene_run/midscene-report
**/midscene_run/dump-logger
# Midscene Examples
Examples for [Midscene.js](https://github.com/web-infra-dev/midscene).
## Examples
Here are some examples you can refer to:
### Web Browser
- [Automate with Scripts in YAML](./yaml-scripts-demo/): Automate with scripts in YAML. This is the easiest way to integrate Midscene with your existing project.
- [Integrate with Playwright](./playwright-demo/): Integrate Midscene with Playwright, including ai action, query ,cache, and report.
- [Integrate with Puppeteer](./puppeteer-demo/): Integrate Midscene with Puppeteer, including ai action, query, cache, and report.
- [Integrate with Puppeteer and Vitest](./puppeteer-with-vitest-demo/): Integrate Midscene with Puppeteer and Vitest. This is an alternative way to do test without Playwright.
- [Bridge Mode](./bridge-mode-demo/): Bridge Mode allow you to use Midscene with your Desktop Chrome.
### Android
- [JavaScript SDK Demo](./android/javascript-sdk-demo/): Integrate Midscene with Android, including ai action, query, cache, and report.
- [Vitest Demo](./android/vitest-demo/): Integrate Midscene with Android and Vitest.
- [YAML Scripts Demo](./android/yaml-scripts-demo/): Automate Android with scripts in YAML. This is the easiest way to integrate Midscene with your existing Android project.
## Connectivity Test
- [Connectivity Test](./connectivity-test/): Use this folder to test the connectivity of the LLM Service.
## Feedback
To open a issue, please go to [https://github.com/web-infra-dev/midscene/issues](https://github.com/web-infra-dev/midscene/issues).
package-lock.json
.env
# Midscene.js dump files
midscene_run/midscene-report
midscene_run/dump-logger
midscene_run/cache
# Android Demo
This is a demo to show how to use Android to do some automation tasks.
If you want to use Android with Vitest, please refer to [android-with-vitest-demo](../android-with-vitest-demo).
## Steps
### Preparation
create `.env` file
```shell
# replace by your gpt-4o api key
OPENAI_API_KEY="YOUR_TOKEN"
```
Refer to this document if your want to use other models like Qwen: https://midscenejs.com/choose-a-model
### Run demo
```bash
npm install
# run demo.ts
npx tsx demo.ts
# run demo with a `.runYaml` call
npx tsx demo-run-yaml.ts
```
# Reference
https://midscenejs.com/integrate-with-puppeteer.html
https://midscenejs.com/api.html
import { AndroidAgent, AndroidDevice, getConnectedDevices } from '@midscene/android';
import "dotenv/config";
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
Promise.resolve(
(async () => {
const devices = await getConnectedDevices();
const page = new AndroidDevice(devices[0].udid);
// 👀 init Midscene agent
const agent = new AndroidAgent(page,{
aiActionContext:
'If any location, permission, user agreement, etc. popup, click agree. If login page pops up, close it.',
});
await page.connect();
await page.launch('https://www.ebay.com');
await sleep(5000);
// 👀 run YAML with agent
const { result } = await agent.runYaml(`
tasks:
- name: search
flow:
- ai: input 'Headphones' in search box, click search button
- sleep: 3000
- name: query
flow:
- aiQuery: "{itemTitle: string, price: Number}[], find item in list and corresponding price"
name: headphones
- aiNumber: "What is the price of the first headphone?"
- aiBoolean: "Is the price of the headphones more than 1000?"
- aiString: "What is the name of the first headphone?"
- aiLocate: "What is the location of the first headphone?"
`);
console.log(result);
})()
);
import { AndroidAgent, AndroidDevice, getConnectedDevices } from '@midscene/android';
import "dotenv/config"; // read environment variables from .env file
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
Promise.resolve(
(async () => {
const devices = await getConnectedDevices();
const page = new AndroidDevice(devices[0].udid);
// 👀 init Midscene agent
const agent = new AndroidAgent(page,{
aiActionContext:
'If any location, permission, user agreement, etc. popup, click agree. If login page pops up, close it.',
});
await page.connect();
await page.launch('https://www.ebay.com');
await sleep(5000);
// 👀 type keywords, perform a search
await agent.aiAction('type "Headphones" in search box, hit Enter');
// 👀 wait for the loading
await agent.aiWaitFor("there is at least one headphone item on page");
// or you may use a plain sleep:
// await sleep(5000);
// 👀 understand the page content, find the items
const items = await agent.aiQuery(
"{itemTitle: string, price: Number}[], find item in list and corresponding price"
);
console.log("headphones in stock", items);
const isMoreThan1000 = await agent.aiBoolean("Is the price of the headphones more than 1000?");
console.log("isMoreThan1000", isMoreThan1000);
const price = await agent.aiNumber("What is the price of the first headphone?");
console.log("price", price);
const name = await agent.aiString("What is the name of the first headphone?");
console.log("name", name);
const location = await agent.aiLocate("What is the location of the first headphone?");
console.log("location", location);
// 👀 assert by AI
await agent.aiAssert("There is a category filter on the left");
})()
);
{
"name": "android-demo",
"private": true,
"version": "1.0.0",
"description": "> quick start",
"main": "index.js",
"type": "module",
"scripts": {
"test": "tsx demo.ts",
"test-yaml": "tsx demo-run-yaml.ts"
},
"author": "",
"license": "MIT",
"devDependencies": {
"@midscene/android": "latest",
"dotenv": "^16.4.5",
"tsx": "4.20.1"
}
}
\ No newline at end of file
# Midscene.js dump files
midscene_run/
> Midscene x adb is still under development. You may use this demo if you want to have an early access.
# Android demo
This is a demo to show how to use adb to control android to do some automation tasks.
## Steps
### Preparation
create `.env` file
```shell
# replace by your gpt-4o api key
OPENAI_API_KEY="YOUR_TOKEN"
```
connect an Android device with [adb](https://developer.android.com/tools/adb)
Refer to this document if your want to use other models like Qwen: https://midscenejs.com/choose-a-model
### Install
install deps
```bash
npm install
```
### Run
case1:
```bash
npm run test -- setting.test.ts
```
or case2:
```
npm run test -- todo.test.ts
```
# Reference
https://midscenejs.com/api
\ No newline at end of file
{
"name": "android-with-vitest-demo",
"private": true,
"version": "1.0.0",
"main": "index.js",
"type": "module",
"scripts": {
"test": "vitest --run"
},
"author": "",
"license": "MIT",
"devDependencies": {
"@midscene/android": "latest",
"@types/node": "^18.0.0",
"dotenv": "^16.4.5",
"vitest": "^2.1.8"
}
}
\ No newline at end of file
import { agentFromAdbDevice, getConnectedDevices } from '@midscene/android';
import { describe, it, vi } from 'vitest';
import 'dotenv/config'; // read environment variables from .env file
vi.setConfig({
testTimeout: 90 * 1000,
});
describe(
'android integration',
async () => {
await it('Android settings page demo for scroll', async () => {
const devices = await getConnectedDevices();
const agent = await agentFromAdbDevice(devices[0].udid,{
aiActionContext:
'If any location, permission, user agreement, etc. popup, click agree. If login page pops up, close it.',
});
await agent.launch('com.android.settings/.Settings');
await agent.aiAction('scroll list to bottom');
await agent.aiAction('open "More settings"');
await agent.aiAction('scroll list to bottom');
await agent.aiAction('scroll list to top');
await agent.aiAction('swipe down one screen');
await agent.aiAction('swipe up one screen');
});
},
360 * 1000,
);
import { AndroidAgent, AndroidDevice, getConnectedDevices } from '@midscene/android';
import { beforeAll, describe, expect, it, vi } from 'vitest';
import 'dotenv/config'; // read environment variables from .env file
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
vi.setConfig({
testTimeout: 240 * 1000,
});
const pageUrl = 'https://todomvc.com/examples/react/dist/';
describe('Test todo list', () => {
let agent: AndroidAgent;
beforeAll(async () => {
const devices = await getConnectedDevices();
const page = new AndroidDevice(devices[0].udid);
agent = new AndroidAgent(page,{
aiActionContext:
'If any location, permission, user agreement, etc. popup, click agree. If login page pops up, close it.',
});
await page.connect();
await page.launch(pageUrl);
await sleep(3000);
});
it(
'ai todo',
async () => {
await agent.aiAction(
"type 'Study JS today' in the task box input and press the Enter key",
);
await agent.aiAction(
"type 'Study Rust tomorrow' in the task box input and press the Enter key",
);
await agent.aiAction(
"type 'Study AI the day after tomorrow' in the task box input and press the Enter key",
);
await agent.aiAction(
'move the mouse to the second item in the task list and click the delete button on the right of the second task',
);
await agent.aiAction(
'click the check button on the left of the second task',
);
await agent.aiAction(
"click the 'completed' status button below the task list",
);
const list = await agent.aiQuery('string[], the complete task list');
expect(list.length).toEqual(1);
await agent.aiAssert(
'Near the bottom of the list, there is a tip shows "1 item left".',
);
const name = await agent.aiString('What is the name of the first todo?');
console.log('name', name);
const todoCount = await agent.aiNumber('How many todos are there in the list?');
console.log('todoCount', todoCount);
const isAllCompleted = await agent.aiBoolean('Is all todos completed?');
console.log('isAllCompleted', isAllCompleted);
const location = await agent.aiLocate('What is the location of the first todo?');
console.log('location', location);
},
720 * 1000,
);
});
output/
package-lock.json
.env
# Midscene.js dump files
midscene_run/report
midscene_run/dump
midscene_run/tmp
\ No newline at end of file
# Yaml Scripts
## Preparation
create `.env` file
```shell
# replace by your gpt-4o own
OPENAI_API_KEY="YOUR_TOKEN"
```
Refer to this document if your want to use other models like Qwen: https://midscenejs.com/choose-a-model
## Install
Ensure that Node.js is installed. Install the `@midscene/cli` globally
```shell
npm i -g @midscene/cli
```
## Run
Run all scripts
> For windows, you need to replace `./` with `.\`, like `midscene .\midscene-scripts\`.
```shell
midscene ./midscene-scripts/
```
Maps navigation demo
```shell
midscene ./midscene-scripts/maps-navigation.yaml
```
Twitter auto-like demo
```shell
midscene ./midscene-scripts/twitter-auto-like.yaml
```
Perform a search on ebay.com
```shell
midscene ./midscene-scripts/search-headphone-on-ebay.yaml
```
# Reference
https://midscenejs.com/automate-with-scripts-in-yaml.html
\ No newline at end of file
# Midscene.js dump files
midscene_run/report
midscene_run/dump
midscene_run/tmp
# search headphone on ebay, extract the items info into a json file, and assert the shopping cart icon
android:
# launch: https://www.ebay.com
deviceId: s4ey59ytbitot4yp
tasks:
- name: travel
flow:
- aiAction: open Maps app
- aiAction: input 'attractions' in the search bar, and click the search button
- aiAction: click the first search result, enter the attraction details page
- aiAction: click "Directions" button, enter the route planning page
- aiAction: click "Start" button to start navigation
# search headphone on ebay, extract the items info into a json file, and assert the shopping cart icon
android:
# launch: https://www.ebay.com
deviceId: s4ey59ytbitot4yp
tasks:
- name: search headphones
flow:
- aiAction: open browser and navigate to ebay.com
- aiAction: type 'Headphones' in ebay search box, hit Enter
- sleep: 5000
- aiAction: scroll down the page for 800px
- name: extract headphones info
flow:
- aiQuery: >
{name: string, price: number, subTitle: string}[], return item name, price and the subTitle on the lower right corner of each item
name: headphones
- aiNumber: "What is the price of the first headphone?"
- aiBoolean: "Is the price of the headphones more than 1000?"
- aiString: "What is the name of the first headphone?"
- aiLocate: "What is the location of the first headphone?"
- name: assert Filter button
flow:
- aiAssert: There is a Filter button on the page
\ No newline at end of file
# search headphone on ebay, extract the items info into a json file, and assert the shopping cart icon
android:
deviceId: s4ey59ytbitot4yp
tasks:
- name: like tweets
flow:
- aiAction: open x app
- aiAction: search 'midscene ai'
- aiAction: click the user called 'midscene ai'
- aiAction: click the first tweet
- aiAction: click the like button
- name: extract tweets info
flow:
- aiQuery: >
{time: string, content: string}[], return time and content of each tweet
name: tweets
- name: assert @midscene_ai account
flow:
- aiAssert: There is a @midscene_ai account on the page
\ No newline at end of file
{
"name": "yaml-scripts-demo",
"private": true,
"version": "1.0.0",
"description": "> quick start",
"main": "index.js",
"type": "module",
"scripts": {
"test": "midscene ./midscene-scripts"
},
"author": "",
"license": "MIT",
"devDependencies": {
"@midscene/cli": "latest"
}
}
\ No newline at end of file
package-lock.json
.env
# Midscene.js dump files
midscene_run/midscene-report
midscene_run/dump-logger
midscene_run/cache
# Bridge mode demo
This is a demo to show how to use bridge mode to control the page on your desktop Chrome.
## Steps
### Preparation
create `.env` file
```shell
# replace by your gpt-4o api key
OPENAI_API_KEY="YOUR_TOKEN"
```
Refer to this document if your want to use other models like Qwen: https://midscenejs.com/choose-a-model
### Install
Install Midscene extension from chrome web store: [Midscene](https://chromewebstore.google.com/detail/midscene/gbldofcpkknbggpkmbdaefngejllnief)
install deps
```bash
npm install
```
### Run
Remember to click the "Allow connection" button from Chrome extension while running.
```bash
npm run demo-new-tab
```
Run demo to connect to the active tab.
```bash
npm run demo-current-tab
```
# Reference
https://midscenejs.com/bridge-mode-by-chrome-extension
https://midscenejs.com/api
\ No newline at end of file
import "dotenv/config"; // read environment variables from .env file
import { AgentOverChromeBridge } from "@midscene/web/bridge-mode";
Promise.resolve(
(async () => {
const agent = new AgentOverChromeBridge();
// This will connect to **the current active tab** on your desktop Chrome
// remember to start your chrome extension, click 'allow connection' button.
await agent.connectCurrentTab();
// After connected, you can see this log. Otherwise you will get an timeout error.
console.log("connected to the active tab !");
const content = await agent.aiQuery(
"what is the title of the page? answer in {title: string}"
);
console.log(content);
await agent.destroy();
})()
);
import "dotenv/config"; // read environment variables from .env file
import { AgentOverChromeBridge } from "@midscene/web/bridge-mode";
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
Promise.resolve(
(async () => {
const agent = new AgentOverChromeBridge({
// uncomment this to close the new tab when destroying the agent
// closeNewTabsAfterDisconnect: true,
});
// This will connect to a new tab on your desktop Chrome
// remember to start your chrome extension, click 'allow connection' button.
await agent.connectNewTabWithUrl("https://www.bing.com");
// After connected, you can see this log. Otherwise you will get an timeout error.
console.log("connected to a new tab !");
// these are the same as normal Midscene agent
await agent.aiAction('type "AI 101" and hit Enter');
await sleep(3000);
await agent.aiAssert("there are some search results");
await agent.destroy();
})()
);
{
"name": "bridge-mode-demo",
"private": true,
"version": "1.0.0",
"description": "> quick start",
"main": "index.js",
"type": "module",
"scripts": {
"demo-new-tab": "npx tsx demo-new-tab.ts",
"demo-current-tab": "npx tsx demo-current-tab.ts",
"demo-yaml-new-tab": "npx midscene yaml-new-tab.yml"
},
"author": "",
"license": "MIT",
"devDependencies": {
"@midscene/cli": "latest",
"@midscene/web": "latest",
"dotenv": "^16.4.5",
"tsx": "4.20.1"
}
}
\ No newline at end of file
# login to sauce demo, extract the items info into a json file, and assert the price of 'Sauce Labs Fleece Jacket'
target:
url: https://www.saucedemo.com/
output: ./output/sauce-demo-items.json
bridgeMode: newTabWithUrl
tasks:
- name: login
flow:
- aiAction: type 'standard_user' in user name input, type 'secret_sauce' in password, click 'Login'
- name: extract items info
flow:
- aiQuery: >
{name: string, price: number, actionBtnName: string}[], return item name, price and the action button name on the lower right corner of each item (like 'Remove')
name: items
- aiAssert: The price of 'Sauce Labs Fleece Jacket' is 49.99
package-lock.json
.env
.env.*
# Midscene.js dump files
midscene_run/midscene-report
midscene_run/dump-logger
midscene_run/cache
# Connectivity Test
Use this folder to test the connectivity of the LLM Service.
## Steps
### Preparation
create `.env` file
```shell
# replace by your gpt-4o api key
OPENAI_API_KEY="YOUR_TOKEN"
```
Refer to this document if your want to use other models like Qwen: https://midscenejs.com/choose-a-model
### Run
```bash
npm install
npm run test
```
# FAQ
* How to resolve `"OPENAI_BASE_URL" is already defined and was NOT overwritten` ?
Remove the `OPENAI_BASE_URL` from your system environment variables so that the `.env` file can take effect.
# Reference
https://midscenejs.com/model-provider.html
\ No newline at end of file
{
"name": "puppeteer-demo",
"private": true,
"version": "1.0.0",
"main": "index.js",
"type": "module",
"scripts": {
"test": "vitest ./tests/connectivity.test.ts"
},
"author": "",
"license": "MIT",
"devDependencies": {
"@azure/identity": "4.5.0",
"@midscene/core": "latest",
"@midscene/shared": "latest",
"dotenv": "^16.4.5",
"openai": "4.57.1",
"puppeteer": "^23.4.0",
"tsx": "4.20.1",
"vitest": "^2.1.8"
}
}
\ No newline at end of file
import { describe, it, expect, vi } from "vitest";
import dotenv from "dotenv";
import OpenAI, { AzureOpenAI } from "openai";
import { join } from "node:path";
import { localImg2Base64 } from "@midscene/shared/img";
import { callToGetJSONObject } from "@midscene/core/ai-model";
import {
DefaultAzureCredential,
getBearerTokenProvider,
} from "@azure/identity";
// read and parse .env file
const result = dotenv.config({
debug: true,
});
if (result.error) {
throw result.error;
}
// uncomment to see the parsed result. It may include some credentials.
// console.log(".env file parsed result");
// console.log(result.parsed);
vi.setConfig({
testTimeout: 30000,
});
const imagePath = join(__dirname, "some_logo.png");
const imageBase64 = localImg2Base64(imagePath);
const model = process.env.MIDSCENE_MODEL_NAME || "gpt-4o";
describe("Use OpenAI SDK directly", () => {
it(`basic call with ${model}`, async () => {
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
baseURL: process.env.OPENAI_BASE_URL,
});
const response = await openai.chat.completions.create({
model: model,
messages: [{ role: "user", content: "Hello, how are you?" }],
});
// console.log(response.choices[0].message.content);
expect(response.choices[0].message.content).toBeTruthy();
});
it(`image input with ${model}`, async () => {
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
baseURL: process.env.OPENAI_BASE_URL,
});
const response = await openai.chat.completions.create({
model: model,
messages: [
{ role: "user", content: "Tell me what is in this image" },
{
role: "user",
content: [
{
type: "image_url",
image_url: {
url: imageBase64,
},
},
],
},
],
});
console.log(response.choices[0].message.content);
expect(response.choices[0].message.content).toBeTruthy();
});
});
describe("Use Midscene wrapped OpenAI SDK", () => {
it("call to get json object", async () => {
const result = await callToGetJSONObject<{ content: string }>(
[
{
role: "user",
content:
"What is the content of this image? return in json format {content: string}",
},
{
role: "user",
content: [
{
type: "image_url",
image_url: {
url: imageBase64,
},
},
],
},
],
2 /* AIActionType.EXTRACT_DATA */
);
console.log(result.content.content);
expect(result.content.content.length).toBeGreaterThan(5);
});
});
// remove the ".skip" if you want to test Azure OpenAI Service
describe.skip("Azure OpenAI Service by ADT Credential", () => {
it("basic call", async () => {
// sample code: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/openai/openai/samples/cookbook/simpleCompletionsPage/app.js
const scope = process.env.MIDSCENE_AZURE_OPENAI_SCOPE;
if (typeof scope !== "string") {
throw new Error("MIDSCENE_AZURE_OPENAI_SCOPE is required");
}
const credential = new DefaultAzureCredential();
const tokenProvider = getBearerTokenProvider(credential, scope);
const extraAzureConfig = JSON.parse(
process.env.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON || "{}"
);
// console.log(extraAzureConfig);
const openai = new AzureOpenAI({
azureADTokenProvider: tokenProvider,
...extraAzureConfig,
});
const response = await openai.chat.completions.create({
model: model,
messages: [{ role: "user", content: "Hello, how are you?" }],
});
expect(response.choices[0].message.content).toBeTruthy();
});
});
package-lock.json
.env
# Midscene.js dump files
midscene_run/midscene-report
midscene_run/dump-logger
midscene_run/cache
# Playwright Demo
This is a demo to show how to use Playwright to do some automation tasks.
If you want to use Playwright with Vitest, please refer to [puppeteer-with-vitest-demo](../puppeteer-with-vitest-demo) for the usage.
## Steps
### Preparation
create `.env` file
```shell
# replace by your gpt-4o api key
OPENAI_API_KEY="YOUR_TOKEN"
```
Refer to this document if your want to use other models like Qwen: https://midscenejs.com/choose-a-model
### Run demo
```bash
npm install
# run demo.ts
npx tsx demo.ts
# run extract-data.ts
npx tsx extract-data.ts
# run demo with a `.runYaml` call
npx tsx demo-run-yaml.ts
```
# Reference
https://midscenejs.com/integrate-with-playwright.html
https://midscenejs.com/api.html
import { chromium } from "playwright";
import { PlaywrightAgent } from "@midscene/web/playwright";
import "dotenv/config";
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
Promise.resolve(
(async () => {
const browser = await chromium.launch({
headless: true, // 'true' means we can't see the browser window
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.setViewportSize({
width: 1280,
height: 800,
});
await page.goto("https://www.ebay.com");
await sleep(5000);
const agent = new PlaywrightAgent(page);
// 👀 run YAML with agent
const { result } = await agent.runYaml(`
tasks:
- name: search
flow:
- ai: input 'Headphones' in search box, click search button
- sleep: 3000
- name: query
flow:
- aiQuery: "{itemTitle: string, price: Number}[], find item in list and corresponding price"
name: headphones
- aiNumber: "What is the price of the first headphone?"
- aiBoolean: "Is the price of the headphones more than 1000?"
- aiString: "What is the name of the first headphone?"
- aiLocate: "What is the location of the first headphone?"
`);
console.log(result);
await browser.close();
})()
);
import { chromium } from "playwright";
import { PlaywrightAgent } from "@midscene/web/playwright";
import "dotenv/config"; // read environment variables from .env file
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
Promise.resolve(
(async () => {
const browser = await chromium.launch({
headless: true, // 'true' means we can't see the browser window
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.setViewportSize({
width: 1280,
height: 768,
});
await page.goto("https://www.ebay.com");
await sleep(5000); // 👀 init Midscene agent
const agent = new PlaywrightAgent(page);
// 👀 type keywords, perform a search
await agent.aiAction('type "Headphones" in search box, hit Enter');
// 👀 wait for the loading
await agent.aiWaitFor("there is at least one headphone item on page");
// or you may use a plain sleep:
// await sleep(5000);
// 👀 understand the page content, find the items
const items = await agent.aiQuery(
"{itemTitle: string, price: Number}[], find item in list and corresponding price"
);
console.log("headphones in stock", items);
const isMoreThan1000 = await agent.aiBoolean("Is the price of the headphones more than 1000?");
console.log("isMoreThan1000", isMoreThan1000);
const price = await agent.aiNumber("What is the price of the first headphone?");
console.log("price", price);
const name = await agent.aiString("What is the name of the first headphone?");
console.log("name", name);
const location = await agent.aiLocate("What is the location of the first headphone?");
console.log("location", location);
// 👀 assert by AI
await agent.aiAssert("There is a category filter on the left");
// 👀 click on the first item
await agent.aiTap("the first item in the list");
await browser.close();
})()
);
import { chromium } from "playwright";
import { PlaywrightAgent } from "@midscene/web/playwright";
import "dotenv/config"; // read environment variables from .env file
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
Promise.resolve(
(async () => {
const browser = await chromium.launch({
headless: false, // set to 'false' to see the browser window for demo
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.setViewportSize({
width: 1280,
height: 768,
});
// Load the contacts demo page (replace with your actual file path or URL)
await page.goto("https://lf3-static.bytednsdoc.com/obj/eden-cn/nupipfups/Midscene/contacts3.html");
// await sleep(2000);
// 🤖 Initialize Midscene agent
const agent = new PlaywrightAgent(page);
console.log("🚀 Starting Smart Contacts Demo with Midscene AI");
console.log("================================================");
// ✨ FEATURE DEMO 1: aiRightClick - Right-click on a contact
console.log("\n1. 🖱️ Testing aiRightClick feature...");
await agent.aiRightClick("Alice Johnson", { deepThink: true });
await sleep(1000);
console.log("✅ Successfully right-clicked on Alice Johnson's contact card");
// Click on "Copy Info" option in context menu
await agent.aiTap("Copy Info");
await sleep(1000);
console.log("✅ Successfully triggered 'Copy Info' action from context menu");
// ✨ FEATURE DEMO 2: aiQuery with domIncluded - Extract contact data including hidden attributes
console.log("\n2. 📊 Testing aiQuery with domIncluded feature...");
const contactsData = await agent.aiQuery(
"{name: string, id: number, company: string, department: string, avatarUrl: string}[], extract all contact information including hidden avatarUrl attributes",
{ domIncluded: true }
);
console.log("✅ Successfully extracted contact data with hidden attributes:");
console.log(JSON.stringify(contactsData, null, 2));
// ✨ FEATURE DEMO 3: aiBoolean with domIncluded - Check for ID fields
console.log("\n3. ❓ Testing aiBoolean with domIncluded feature...");
const isId1 = await agent.aiBoolean(
"is First contact's id is 1?",
{ domIncluded: true }
);
console.log("✅ Is First contact's id is 1?", isId1);
// ✨ FEATURE DEMO 4: aiNumber - with domIncluded - Count contacts
console.log("\n4. 🔢 Testing aiNumber with domIncluded feature...");
const contactCount = await agent.aiNumber("First contact's id?", { domIncluded: true });
console.log("✅ First contact's id:", contactCount);
// ✨ FEATURE DEMO 5: aiString with domIncluded - Get first contact's ID
console.log("\n5. 🆔 Testing aiString with domIncluded feature...");
const firstContactId = await agent.aiString(
"What is the Avatar URL of the first contact?",
{ domIncluded: true }
);
console.log("✅ First contact's Avatar URL:", firstContactId);
console.log("\n🎉 Smart Contacts Demo completed!");
console.log("================================================");
console.log("✨ Midscene features demonstrated:");
console.log(" • aiRightClick() with deepThink - Custom context menus");
console.log(" • aiQuery() with domIncluded - Extract hidden ID attributes");
console.log(" • aiBoolean() with domIncluded - DOM-based boolean checks");
console.log(" • aiNumber() - with domIncluded - Hidden ID attributes");
console.log(" • aiString() with domIncluded - Extract hidden Avatar URL values");
// Keep browser open for a few seconds to see the results
await sleep(3000);
await browser.close();
})()
);
\ No newline at end of file
{
"name": "playwright-demo",
"private": true,
"version": "1.0.0",
"description": "> quick start",
"main": "index.js",
"type": "module",
"scripts": {
"test": "tsx demo.ts",
"test-yaml": "tsx demo-run-yaml.ts"
},
"author": "",
"license": "MIT",
"devDependencies": {
"@midscene/web": "latest",
"@playwright/test": "^1.54.1",
"dotenv": "^16.4.5",
"playwright": "1.54.1",
"tsx": "4.20.1"
}
}
\ No newline at end of file
# Midscene.js dump files
midscene_run/midscene-report
midscene_run/dump-logger
package-lock.json
\ No newline at end of file
registry=https://registry.npmjs.org/
strict-peer-dependencies=false
save-prefix=''
save-workspace-protocol=rolling
ignore-compatibility-db=true
use-lockfile-v6=true
puppeteer_download_base_url=https://cdn.npmmirror.com/binaries/chrome-for-testing
# playwright-testing-demo
## Steps
### Preparation
create `.env` file
```shell
# replace by your gpt-4o api key
OPENAI_API_KEY="YOUR_TOKEN"
```
Refer to this document if your want to use other models like Qwen: https://midscenejs.com/choose-a-model
### Run demo
run e2e test
```bash
pnpm install
# run e2e test
pnpm run e2e
# prefer using cache
pnpm run e2e:cache
# run e2e with playwright ui, remember to click the little "Play" button on the upper-left corner
pnpm run e2e:ui
# run e2e with playwright ui + cache
pnpm run e2e:ui:cache
```
After the above command executes successfully, the console will output: `Midscene - report file updated: ./current_cwd/midscene_run/report/some_id.html.` You can open this file in a browser to view the report.
# Reference
https://midscenejs.com/integrate-with-playwright.html
https://midscenejs.com/api.html
import { expect } from "@playwright/test";
import { test } from "./fixture";
test.beforeEach(async ({ page }) => {
page.setViewportSize({ width: 1280, height: 768 });
await page.goto("https://www.ebay.com");
await page.waitForLoadState("networkidle");
});
test("search headphone on ebay", async ({
ai,
aiQuery,
aiAssert,
aiWaitFor,
aiNumber,
aiBoolean,
aiString,
aiLocate,
}) => {
// 👀 type keywords, perform a search
await ai('type "Headphones" in search box, hit Enter');
// 👀 wait for the loading
await aiWaitFor("there is at least one headphone item on page");
// 👀 find the items
const items = await aiQuery(
"{itemTitle: string, price: Number}[], find item in list and corresponding price"
);
const isMoreThan1000 = await aiBoolean("Is the price of the headphones more than 1000?");
console.log("isMoreThan1000", isMoreThan1000);
console.log("headphones in stock", items);
expect(items?.length).toBeGreaterThan(0);
const price = await aiNumber("What is the price of the first headphone?");
console.log("price", price);
const name = await aiString("What is the name of the first headphone?");
console.log("name", name);
const location = await aiLocate("What is the location of the first headphone?");
console.log("location", location);
// 👀 assert by AI
await aiAssert("There is a category filter on the left");
});
import { test as base } from "@playwright/test";
import type { PlayWrightAiFixtureType } from "@midscene/web/playwright";
import { PlaywrightAiFixture } from "@midscene/web/playwright";
export const test = base.extend<PlayWrightAiFixtureType>(PlaywrightAiFixture());
import { test } from "./fixture";
test.beforeEach(async ({ page }) => {
page.setViewportSize({ width: 400, height: 905 });
await page.goto("https://heyteavivocity.meuu.online/home");
await page.waitForLoadState("networkidle");
});
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
test("ai online order", async ({ page, ai, aiAssert }) => {
await ai("点击左上角语言切换按钮(English、中文),在弹出的下拉列表中点击中文");
await ai("向下滚动一屏");
await sleep(2000);
await ai("直接点击多肉葡萄的规格按钮");
await ai("点击不使用吸管、点击冰沙推荐、点击正常冰推荐");
await ai("向下滚动一屏");
await sleep(2000);
await ai("点击标准甜、点击绿妍(推荐)、点击标准口味");
await aiAssert("页面上没有『选好了』的按钮");
await ai("滚动到最下面");
await aiAssert("页面上有『选好了』的按钮");
await ai("点击选好了按钮");
});
import { expect } from "@playwright/test";
import { test } from "./fixture";
test.beforeEach(async ({ page }) => {
await page.goto("https://todomvc.com/examples/react/dist/");
});
test("ai todo - English Prompt", async ({ ai, aiQuery, aiAssert, aiTap }) => {
// .ai - general AI operation method
await ai(
"Input 'Learn JS today' in the task box input and press the Enter key"
);
await ai(
"Input 'Learn Rust tomorrow' in the task box input and press the Enter key"
);
await ai(
"Input 'Learn AI the day after tomorrow' in the task box input and press the Enter key"
);
await ai(
"Move the mouse to the second item in the task list and click the delete button on the right of the second task"
);
// .aiTap - specify the operation type
await aiTap("the check button on the left of the second task");
await aiTap("the 'completed' status button below the task list");
const list = await aiQuery("string[], the complete task list");
expect(list.length).toEqual(1);
await aiAssert(
'Near the bottom of the list, there is a tip shows "1 item left".'
);
});
import { test } from "./fixture";
test.beforeEach(async ({ page }) => {
await page.goto("https://todomvc.com/examples/react/dist/");
});
test("ai todo - Chinese Prompt", async ({ ai, aiQuery, aiAssert, aiTap, aiHover }) => {
// .ai - 通用 AI 操作方法
await ai("在任务框 input 输入 今天学习 JS,按回车键");
await ai("在任务框 input 输入 明天学习 Rust,按回车键");
await ai("在任务框 input 输入后天学习 AI,按回车键");
// .aiTap, .aiHover - 即时操作接口
await aiHover('任务列表中的第二项');
await aiTap("第二项任务右边的删除按钮");
await aiTap("第二条任务左边的勾选按钮");
await aiTap("任务列表下面的 completed 状态按钮");
await aiAssert('列表下方有一个区域显示有 "1 item left"');
});
midsceneVersion: 0.24.2-beta-20250731030716.0
cacheId: ebay-search.spec.ts(search-headphone-on-ebay)
caches:
- type: plan
prompt: type "Headphones" in search box, hit Enter
yamlWorkflow: |
tasks:
- name: type "Headphones" in search box, hit Enter
flow:
- aiInput: Headphones
locate: eBay main search input box
- sleep: 500
- aiKeyboardPress: Enter
- type: locate
prompt: eBay main search input box
xpaths:
- >-
/html/body/div[4]/div[1]/div[1]/div[1]/div[1]/header[1]/section[1]/form[1]/div[1]/div[1]/div[1]/input[1]
- type: locate
prompt: What is the location of the first headphone?
xpaths:
- >-
/html/body/div[5]/div[4]/div[3]/div[1]/div[3]/ul[1]/li[1]/div[1]/div[2]/div[4]/div[1]/div[4]/span[1]
midsceneVersion: 0.24.2-beta-20250731030716.0
cacheId: online-order-zh.spec.ts(ai-online-order)
caches:
- type: plan
prompt: 点击左上角语言切换按钮(English、中文),在弹出的下拉列表中点击中文
yamlWorkflow: |
tasks:
- name: 点击左上角语言切换按钮(English、中文),在弹出的下拉列表中点击中文
flow:
- aiTap: Top left language switch button displaying 'English'
- sleep: 500
- aiTap: 语言切换下拉列表中的‘中文’选项
- type: plan
prompt: 向下滚动一屏
yamlWorkflow: |
tasks:
- name: 向下滚动一屏
flow:
- aiScroll: null
direction: down
scrollType: once
distance: null
- sleep: 500
- type: plan
prompt: 直接点击多肉葡萄的规格按钮
yamlWorkflow: |
tasks:
- name: 直接点击多肉葡萄的规格按钮
flow:
- aiTap: 多肉葡萄(首创)对应的选规格按钮
- sleep: 500
- type: plan
prompt: 点击不使用吸管、点击冰沙推荐、点击正常冰推荐
yamlWorkflow: |
tasks:
- name: 点击不使用吸管、点击冰沙推荐、点击正常冰推荐
flow:
- aiTap: “不使用吸管”选项的复选框
- sleep: 300
- aiTap: 冰沙(推荐)的复选框
- aiTap: 正常冰(推荐)的复选框
- type: plan
prompt: 向下滚动一屏
yamlWorkflow: |
tasks:
- name: 向下滚动一屏
flow:
- aiScroll: null
direction: down
scrollType: once
distance: null
- sleep: 500
- type: plan
prompt: 点击标准甜、点击绿妍(推荐)、点击标准口味
yamlWorkflow: |
tasks:
- name: 点击标准甜、点击绿妍(推荐)、点击标准口味
flow:
- aiTap: 标准甜(推荐)复选框
- sleep: 200
- aiTap: 绿妍(推荐)的复选框
- sleep: 100
- aiTap: The '标准口味(推荐)' checkbox under 口味 section
- type: plan
prompt: 滚动到最下面
yamlWorkflow: |
tasks:
- name: 滚动到最下面
flow:
- aiScroll: null
direction: down
scrollType: untilBottom
distance: null
- type: plan
prompt: 点击选好了按钮
yamlWorkflow: |
tasks:
- name: 点击选好了按钮
flow:
- aiTap: 页面底部的黄色“选好了”按钮
- type: locate
prompt: Top left language switch button displaying 'English'
xpaths:
- >-
/html/body/div[1]/div[1]/div[1]/header[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/section[1]/span[1]/span[normalize-space()="English"]
- type: locate
prompt: 语言切换下拉列表中的‘中文’选项
xpaths:
- /html/body/div[3]/div[1]/div[1]/ul[1]/li[2]/span[normalize-space()="中文"]
- type: locate
prompt: 多肉葡萄(首创)对应的选规格按钮
xpaths:
- >-
/html/body/div[1]/div[1]/div[2]/div[1]/div[2]/div[1]/div[2]/div[1]/div[3]/div[2]/div[2]/div[3]/div[2]/div[1]/div[1]/div[2]/div[2]/div[normalize-space()="选规格"]
- type: locate
prompt: “不使用吸管”选项的复选框
xpaths:
- >-
/html/body/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/section[1]/section[2]/div[2]/div[1]/div[2]/div[1]/div[2]/label[1]/span[1]/input[1]
- type: locate
prompt: 冰沙(推荐)的复选框
xpaths:
- >-
/html/body/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/section[1]/section[2]/div[2]/div[2]/div[2]/div[1]/div[1]/label[1]/span[1]/input[1]
- type: locate
prompt: 正常冰(推荐)的复选框
xpaths:
- >-
/html/body/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/section[1]/section[2]/div[2]/div[3]/div[2]/div[1]/div[1]/label[1]/span[1]/input[1]
- type: locate
prompt: 绿妍(推荐)的复选框
xpaths:
- >-
/html/body/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/section[1]/section[2]/div[2]/div[5]/div[2]/div[1]/div[1]/label[1]/span[1]/input[1]
- type: locate
prompt: The '标准口味(推荐)' checkbox under 口味 section
xpaths:
- >-
/html/body/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/section[1]/section[2]/div[2]/div[6]/div[2]/div[1]/div[1]/label[1]/span[1]/input[1]
- type: locate
prompt: 页面底部的黄色“选好了”按钮
xpaths:
- >-
/html/body/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/section[2]/div[1]/div[3]/button[normalize-space()="选好了"]
midsceneVersion: 0.24.2-beta-20250731030716.0
cacheId: todo-mvc-en.spec.ts(ai-todo---English-Prompt)
caches:
- type: plan
prompt: Input 'Learn JS today' in the task box input and press the Enter key
yamlWorkflow: |
tasks:
- name: Input 'Learn JS today' in the task box input and press the Enter key
flow:
- aiInput: Learn JS today
locate: 任务输入框
- aiKeyboardPress: Enter
- type: plan
prompt: Input 'Learn Rust tomorrow' in the task box input and press the Enter key
yamlWorkflow: |
tasks:
- name: Input 'Learn Rust tomorrow' in the task box input and press the Enter key
flow:
- aiInput: Learn Rust tomorrow
locate: Task input box with placeholder 'What needs to be done?'
- aiKeyboardPress: Enter
- type: plan
prompt: >-
Input 'Learn AI the day after tomorrow' in the task box input and press
the Enter key
yamlWorkflow: |
tasks:
- name: >-
Input 'Learn AI the day after tomorrow' in the task box input and press
the Enter key
flow:
- aiInput: Learn AI the day after tomorrow
locate: The task input box with placeholder 'What needs to be done?'
- aiKeyboardPress: Enter
- type: plan
prompt: >-
Move the mouse to the second item in the task list and click the delete
button on the right of the second task
yamlWorkflow: |
tasks:
- name: >-
Move the mouse to the second item in the task list and click the delete
button on the right of the second task
flow:
- aiTap: Delete button on the right of the second task 'Learn Rust tomorrow'
- type: locate
prompt: 任务输入框
xpaths:
- /html/body/section[1]/header[1]/div[1]/input[1]
- type: locate
prompt: Task input box with placeholder 'What needs to be done?'
xpaths:
- /html/body/section[1]/header[1]/div[1]/input[1]
- type: locate
prompt: The task input box with placeholder 'What needs to be done?'
xpaths:
- /html/body/section[1]/header[1]/div[1]/input[1]
- type: locate
prompt: Delete button on the right of the second task 'Learn Rust tomorrow'
xpaths:
- >-
/html/body/section[1]/main[1]/ul[1]/li[2]/div[1]/label[normalize-space()="Learn
Rust tomorrow"]
- type: locate
prompt: the check button on the left of the second task
xpaths:
- /html/body/section[1]/main[1]/ul[1]/li[2]/div[1]/input[1]
- type: locate
prompt: the 'completed' status button below the task list
xpaths:
- >-
/html/body/section[1]/footer[1]/ul[1]/li[3]/a[normalize-space()="Completed"]
midsceneVersion: 0.24.2-beta-20250731030716.0
cacheId: todo-mvc-zh.spec.ts(ai-todo---Chinese-Prompt)
caches:
- type: plan
prompt: 在任务框 input 输入 今天学习 JS,按回车键
yamlWorkflow: |
tasks:
- name: 在任务框 input 输入 今天学习 JS,按回车键
flow:
- aiInput: 今天学习 JS
locate: 显示“What needs to be done?”的任务输入框
- aiKeyboardPress: Enter
- type: plan
prompt: 在任务框 input 输入 明天学习 Rust,按回车键
yamlWorkflow: |
tasks:
- name: 在任务框 input 输入 明天学习 Rust,按回车键
flow:
- aiInput: 明天学习 Rust
locate: 显示“What needs to be done?”的任务输入框
- aiKeyboardPress: Enter
- type: plan
prompt: 在任务框 input 输入后天学习 AI,按回车键
yamlWorkflow: |
tasks:
- name: 在任务框 input 输入后天学习 AI,按回车键
flow:
- aiInput: 后天学习 AI
locate: 显示“What needs to be done?”的任务输入框
- aiKeyboardPress: Enter
- type: locate
prompt: 显示“What needs to be done?”的任务输入框
xpaths:
- /html/body/section[1]/header[1]/div[1]/input[1]
- type: locate
prompt: 显示“What needs to be done?”的任务输入框
xpaths:
- /html/body/section[1]/header[1]/div[1]/input[1]
- type: locate
prompt: 显示“What needs to be done?”的任务输入框
xpaths:
- /html/body/section[1]/header[1]/div[1]/input[1]
- type: locate
prompt: 任务列表中的第二项
xpaths:
- /html/body/section[1]/main[1]/ul[1]/li[2]/div[1]/label[1]
- type: locate
prompt: 第二项任务右边的删除按钮
xpaths:
- /html/body/section[1]/main[1]/ul[1]/li[2]/div[1]/button[1]
- type: locate
prompt: 第二条任务左边的勾选按钮
xpaths:
- /html/body/section[1]/main[1]/ul[1]/li[2]/div[1]/input[1]
- type: locate
prompt: 任务列表下面的 completed 状态按钮
xpaths:
- >-
/html/body/section[1]/footer[1]/ul[1]/li[3]/a[normalize-space()="Completed"]
{
"name": "playwright-testing-demo",
"private": true,
"version": "0.0.1",
"type": "module",
"scripts": {
"e2e": "playwright test --config=playwright.config.ts",
"e2e:cache": "cross-env MIDSCENE_CACHE=true playwright test --config=playwright.config.ts",
"e2e:ui": "playwright test --config=playwright.config.ts --ui",
"e2e:ui:cache": "cross-env MIDSCENE_CACHE=true playwright test --config=playwright.config.ts --ui",
"postinstall": "pnpm exec playwright install"
},
"devDependencies": {
"@midscene/web": "latest",
"@playwright/test": "1.52.0",
"@types/jest": "~29.5.14",
"@types/node": "~22.7.9",
"cross-env": "7.0.3",
"dotenv": "16.4.5",
"eslint-plugin-prettier": "~5.2.1",
"rimraf": "~6.0.1",
"tsx": "4.20.3",
"typescript": "~5.6.3"
},
"publishConfig": {
"access": "public"
}
}
import { defineConfig, devices } from "@playwright/test";
import dotenv from "dotenv";
/**
* Read environment variables from file.
* https://github.com/motdotla/dotenv
*/
dotenv.config();
/**
* See https://playwright.dev/docs/test-configuration.
*/
export default defineConfig({
testDir: "./e2e",
testMatch: "**/*.spec.ts",
timeout: 10 * 60 * 1000,
/* Run tests in files in parallel */
fullyParallel: false,
/* Fail the build on CI if you accidentally left test.only in the source code. */
forbidOnly: Boolean(process.env.CI),
/* Retry on CI only */
retries: process.env.CI ? 2 : 0,
/* Opt out of parallel tests on CI. */
workers: process.env.CI ? 1 : undefined,
/* Reporter to use. See https://playwright.dev/docs/test-reporters */
reporter: [
[process.env.CI ? "line" : "list"],
["@midscene/web/playwright-reporter"],
],
/* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
use: {
/* Base URL to use in actions like `await page.goto('/')`. */
// baseURL: 'http://127.0.0.1:3000',
/* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
trace: "on-first-retry",
},
/* Configure projects for major browsers */
projects: [
{
name: "chromium",
use: { ...devices["Desktop Chrome"] },
},
],
});
{
"compilerOptions": {
"baseUrl": ".",
"declaration": true,
"emitDeclarationOnly": true,
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"isolatedModules": true,
"jsx": "preserve",
"lib": ["DOM", "ESNext"],
"moduleResolution": "node",
"paths": {
"@/*": ["./src/*"]
},
"target": "ES2017",
"resolveJsonModule": true,
"rootDir": "./",
"skipLibCheck": true,
"strict": true
},
"exclude": [ "node_modules"],
"include": ["src", "tests", "./playwright.config.ts", "./vitest.config"]
}
package-lock.json
.env
# Midscene.js dump files
midscene_run/midscene-report
midscene_run/dump-logger
midscene_run/cache
# Puppeteer Demo
This is a demo to show how to use Puppeteer to do some automation tasks.
If you want to use Puppeteer with Vitest, please refer to [puppeteer-with-vitest-demo](../puppeteer-with-vitest-demo).
## Steps
### Preparation
create `.env` file
```shell
# replace by your gpt-4o api key
OPENAI_API_KEY="YOUR_TOKEN"
```
Refer to this document if your want to use other models like Qwen: https://midscenejs.com/choose-a-model
### Run demo
```bash
npm install
# run demo.ts
npx tsx demo.ts
# run extract-data.ts
npx tsx extract-data.ts
# run demo with a `.runYaml` call
npx tsx demo-run-yaml.ts
```
# Reference
https://midscenejs.com/integrate-with-puppeteer.html
https://midscenejs.com/api.html
import puppeteer from "puppeteer";
import os from "node:os";
import { PuppeteerAgent } from "@midscene/web/puppeteer";
import "dotenv/config";
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
Promise.resolve(
(async () => {
const browser = await puppeteer.launch({
headless: true, // 'true' means we can't see the browser window
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.setViewport({
width: 1280,
height: 800,
deviceScaleFactor: os.platform() === "darwin" ? 2 : 1, // this is used to avoid flashing on UI Mode when doing screenshot on Mac
});
await page.goto("https://www.ebay.com");
await sleep(5000);
const agent = new PuppeteerAgent(page);
// 👀 run YAML with agent
const { result } = await agent.runYaml(`
tasks:
- name: search
flow:
- ai: input 'Headphones' in search box, click search button
- sleep: 3000
- name: query
flow:
- aiQuery: "{itemTitle: string, price: Number}[], find item in list and corresponding price"
name: headphones
- aiNumber: "What is the price of the first headphone?"
- aiBoolean: "Is the price of the headphones more than 1000?"
- aiString: "What is the name of the first headphone?"
- aiLocate: "What is the location of the first headphone?"
`);
console.log(result);
await browser.close();
})()
);
import puppeteer from "puppeteer";
import os from "node:os";
import { PuppeteerAgent } from "@midscene/web/puppeteer";
import "dotenv/config"; // read environment variables from .env file
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
Promise.resolve(
(async () => {
const browser = await puppeteer.launch({
headless: true, // 'true' means we can't see the browser window
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.setViewport({
width: 1280,
height: 768,
deviceScaleFactor: os.platform() === "darwin" ? 2 : 1, // this is used to avoid flashing on UI Mode when doing screenshot on Mac
});
await page.goto("https://www.ebay.com");
await sleep(5000);
// 👀 init Midscene agent
const agent = new PuppeteerAgent(page);
// 👀 type keywords, perform a search
await agent.aiAction('type "Headphones" in search box, hit Enter');
// 👀 wait for the loading
await agent.aiWaitFor("there is at least one headphone item on page");
// or you may use a plain sleep:
// await sleep(5000);
// 👀 understand the page content, find the items
const items = await agent.aiQuery(
"{itemTitle: string, price: Number}[], find item in list and corresponding price"
);
console.log("headphones in stock", items);
const isMoreThan1000 = await agent.aiBoolean("Is the price of the headphones more than 1000?");
console.log("isMoreThan1000", isMoreThan1000);
const price = await agent.aiNumber("What is the price of the first headphone?");
console.log("price", price);
const name = await agent.aiString("What is the name of the first headphone?");
console.log("name", name);
const location = await agent.aiLocate("What is the location of the first headphone?");
console.log("location", location);
// 👀 assert by AI
await agent.aiAssert("There is a category filter on the left");
// 👀 click on the first item
await agent.aiTap("the first item in the list");
await browser.close();
})()
);
import puppeteer from "puppeteer";
import os from "node:os";
import { PuppeteerAgent } from "@midscene/web/puppeteer";
import "dotenv/config"; // read environment variables from .env file
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
Promise.resolve(
(async () => {
const browser = await puppeteer.launch({
headless: false, // set to 'false' to see the browser window for demo
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.setViewport({
width: 1280,
height: 768,
deviceScaleFactor: os.platform() === "darwin" ? 2 : 1,
});
// Load the contacts demo page (replace with your actual file path or URL)
await page.goto("https://lf3-static.bytednsdoc.com/obj/eden-cn/nupipfups/Midscene/contacts3.html");
// await sleep(2000);
// 🤖 Initialize Midscene agent
const agent = new PuppeteerAgent(page);
console.log("🚀 Starting Smart Contacts Demo with Midscene AI");
console.log("================================================");
// ✨ FEATURE DEMO 1: aiRightClick - Right-click on a contact
console.log("\n1. 🖱️ Testing aiRightClick feature...");
await agent.aiRightClick("Alice Johnson", { deepThink: true });
await sleep(1000);
console.log("✅ Successfully right-clicked on Alice Johnson's contact card");
// Click on "Copy Info" option in context menu
await agent.aiTap("Copy Info");
await sleep(1000);
console.log("✅ Successfully triggered 'Copy Info' action from context menu");
// ✨ FEATURE DEMO 2: aiQuery with domIncluded - Extract contact data including hidden attributes
console.log("\n2. 📊 Testing aiQuery with domIncluded feature...");
const contactsData = await agent.aiQuery(
"{name: string, id: number, company: string, department: string, avatarUrl: string}[], extract all contact information including hidden avatarUrl attributes",
{ domIncluded: true }
);
console.log("✅ Successfully extracted contact data with hidden attributes:");
console.log(JSON.stringify(contactsData, null, 2));
// ✨ FEATURE DEMO 3: aiBoolean with domIncluded - Check for ID fields
console.log("\n3. ❓ Testing aiBoolean with domIncluded feature...");
const isId1 = await agent.aiBoolean(
"is First contact's id is 1?",
{ domIncluded: true }
);
console.log("✅ Is First contact's id is 1?", isId1);
// ✨ FEATURE DEMO 4: aiNumber - with domIncluded - Count contacts
console.log("\n4. 🔢 Testing aiNumber with domIncluded feature...");
const contactCount = await agent.aiNumber("First contact's id?", { domIncluded: true });
console.log("✅ First contact's id:", contactCount);
// ✨ FEATURE DEMO 5: aiString with domIncluded - Get first contact's ID
console.log("\n5. 🆔 Testing aiString with domIncluded feature...");
const firstContactId = await agent.aiString(
"What is the Avatar URL of the first contact?",
{ domIncluded: true }
);
console.log("✅ First contact's Avatar URL:", firstContactId);
console.log("\n🎉 Smart Contacts Demo completed!");
console.log("================================================");
console.log("✨ Midscene features demonstrated:");
console.log(" • aiRightClick() with deepThink - Custom context menus");
console.log(" • aiQuery() with domIncluded - Extract hidden ID attributes");
console.log(" • aiBoolean() with domIncluded - DOM-based boolean checks");
console.log(" • aiNumber() - with domIncluded - Hidden ID attributes");
console.log(" • aiString() with domIncluded - Extract hidden Avatar URL values");
// Keep browser open for a few seconds to see the results
await sleep(3000);
await browser.close();
})()
);
\ No newline at end of file
{
"name": "puppeteer-demo",
"private": true,
"version": "1.0.0",
"description": "> quick start",
"main": "index.js",
"type": "module",
"scripts": {
"test": "tsx demo.ts",
"test-yaml": "tsx demo-run-yaml.ts"
},
"author": "",
"license": "MIT",
"devDependencies": {
"@midscene/web": "latest",
"dotenv": "^16.4.5",
"puppeteer": "^24.0.0",
"tsx": "4.20.1"
}
}
\ No newline at end of file
package-lock.json
.env
# Midscene.js dump files
midscene_run/midscene-report
midscene_run/dump-logger
midscene_run
# Puppeteer with Vitest Demo
This is a demo to show how to use Puppeteer with Vitest to test AI actions and assertions.
## Steps
### Preparation
create `.env` file
```shell
# replace by your gpt-4o api key
OPENAI_API_KEY="YOUR_TOKEN"
```
Refer to this document if your want to use other models like Qwen: https://midscenejs.com/choose-a-model
### Run demo
```bash
npm install
npm run test
```
# Reference
https://midscenejs.com/integrate-with-puppeteer.html
https://midscenejs.com/api.html
{
"name": "puppeteer-with-vitest-demo",
"private": true,
"version": "1.0.0",
"main": "index.js",
"type": "module",
"scripts": {
"test": "vitest ./tests/"
},
"author": "",
"license": "MIT",
"devDependencies": {
"@midscene/web": "latest",
"dotenv": "^16.4.5",
"puppeteer": "^24.0.0",
"vitest": "^2.1.8"
}
}
\ No newline at end of file
import { describe, it, expect, vi, beforeAll } from "vitest";
import puppeteer from "puppeteer";
import { PuppeteerAgent } from "@midscene/web/puppeteer";
import "dotenv/config"; // read environment variables from .env file
vi.setConfig({
testTimeout: 240 * 1000,
});
const pageUrl = "https://todomvc.com/examples/react/dist/";
describe("Test todo list", () => {
let agent: PuppeteerAgent;
beforeAll(async () => {
const browser = await puppeteer.launch({
headless: true,
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
await page.goto(pageUrl);
await page.waitForNetworkIdle();
agent = new PuppeteerAgent(page);
return () => {
browser.close();
};
});
it("ai todo", async () => {
await agent.aiAction(
"type 'Study JS today' in the task box input and press the Enter key"
);
await agent.aiAction(
"type 'Study Rust tomorrow' in the task box input and press the Enter key"
);
await agent.aiAction(
"type 'Study AI the day after tomorrow' in the task box input and press the Enter key"
);
await agent.aiAction(
"move the mouse to the second item in the task list and click the delete button on the right of the second task"
);
await agent.aiAction(
"click the check button on the left of the second task"
);
await agent.aiAction(
"click the 'completed' status button below the task list"
);
const list = await agent.aiQuery("string[], the complete task list");
expect(list.length).toEqual(1);
await agent.aiAssert(
'Near the bottom of the list, there is a tip shows "1 item left".'
);
const name = await agent.aiString('What is the name of the first todo?');
console.log('name', name);
const todoCount = await agent.aiNumber('How many todos are there in the list?');
console.log('todoCount', todoCount);
const isAllCompleted = await agent.aiBoolean('Is all todos completed?');
console.log('isAllCompleted', isAllCompleted);
const location = await agent.aiLocate('What is the location of the first todo?');
console.log('location', location);
});
});
# Midscene.js dump files
../midscene_run
node_modules
package-lock.json
.envcd
\ No newline at end of file
# Example batch execution index YAML file
# This demonstrates how to use the multi-YAML file batch execution feature
# Concurrency settings (default: 1 for sequential execution)
concurrent: 10
# Continue execution even if one file fails (default: false)
continueOnError: true
# Summary output file
summary: "./midscene_run/output/custom-summary.json"
# Global web environment configuration (applied to all files)
web:
# All individual YAML files will inherit these settings
shareBrowserContext: true
viewportWidth: 1280
viewportHeight: 720
# bridgeMode: "newTabWithUrl"
# Output directory for individual files (will be combined with file-specific paths)
# Global android environment configuration (if needed)
# android:
# deviceId: "emulator-5554"
# Execution order using glob patterns
files:
- "midscene-scripts/extract-github-status.yaml"
- "midscene-scripts/bing-search.yaml"
- "midscene-scripts/local-static-server.yml"
- "midscene-scripts/s*.yaml"
{
"name": "ijiwei-yaml",
"private": true,
"version": "1.0.0",
"description": "> quick start",
"main": "index.js",
"type": "module",
"scripts": {
"test": "midscene ./ijiwei-web",
"test:config": "midscene --config config.yml"
},
"author": "",
"license": "MIT",
"devDependencies": {
"@midscene/cli": "^0.26.6"
}
}
web:
#要测试的网址
url: https://jiweidev.jiweinet.com/
#浏览器窗口大小配置
shareBrowserContext: true
viewportWidth: 2580
viewportHeight: 1320
#用例编写
tasks:
####################################【顶部静态页面】####################################
# - name: 【行业咨询】页面验证不包含404
# flow:
# - ai: 点击 "行业咨询"按钮
# - sleep: 3000
# - aiAssert: 查询结果不包含“404”
# - aiAssert: 详情页面无报错
#
# - name: 【品牌营销】页面验证不包含404
# flow:
# - ai: 点击 "品牌营销"按钮
# - sleep: 3000
# - aiAssert: 查询结果不包含“404”
# - aiAssert: 详情页面无报错
#
# - name: 【集微资讯】页面验证不包含404
# flow:
# - ai: 点击 "集微咨询"按钮
# - sleep: 3000
# - aiAssert: 查询结果不包含“404”
# - aiAssert: 详情页面无报错
#
# - name: 【知识产权】页面验证不包含404
# flow:
# - ai: 点击 "知识产权"按钮
# - sleep: 3000
# - aiAssert: 查询结果不包含“404”
# - aiAssert: 详情页面无报错
#
# - name: 【集微职场】页面验证不包含404
# flow:
# - ai: 点击 "集微职场"按钮
# - sleep: 3000
# - aiAssert: 查询结果不包含“404”
# - aiAssert: 详情页面无报错
#
# - name: 【集微投融资】页面验证不包含404
# flow:
# - ai: 点击 "集微投融资"按钮
# - sleep: 3000
# - aiAssert: 查询结果不包含“404”
# - aiAssert: 详情页面无报错
- name: 【集微企业库】页面验证不包含404
flow:
- ai: 点击 "集微企业库"按钮
- sleep: 1000
- aiAssert: 查询结果不包含“404”
- aiAssert: 详情页面无报错
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment