From ff97ca1d0362ce54854bb44bded2224ba2c8ab52 Mon Sep 17 00:00:00 2001 From: Mikias Tilahun Abebe Date: Mon, 27 Jan 2025 12:41:57 +0300 Subject: [PATCH 1/3] feat(puppeteer): Add support for connecting to remote browser via WebSocket --- .../src/document_loaders/web/puppeteer.ts | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/libs/langchain-community/src/document_loaders/web/puppeteer.ts b/libs/langchain-community/src/document_loaders/web/puppeteer.ts index 280bc1c679a2..7a61be002a36 100644 --- a/libs/langchain-community/src/document_loaders/web/puppeteer.ts +++ b/libs/langchain-community/src/document_loaders/web/puppeteer.ts @@ -4,6 +4,7 @@ import type { Page, Browser, PuppeteerLaunchOptions, + connect, } from "puppeteer"; import { Document } from "@langchain/core/documents"; @@ -52,8 +53,7 @@ export type PuppeteerWebBaseLoaderOptions = { */ export class PuppeteerWebBaseLoader extends BaseDocumentLoader - implements DocumentLoader -{ + implements DocumentLoader { options: PuppeteerWebBaseLoaderOptions | undefined; constructor(public webPath: string, options?: PuppeteerWebBaseLoaderOptions) { @@ -65,14 +65,23 @@ export class PuppeteerWebBaseLoader url: string, options?: PuppeteerWebBaseLoaderOptions ): Promise { - const { launch } = await PuppeteerWebBaseLoader.imports(); + const { launch, connect } = await PuppeteerWebBaseLoader.imports(); - const browser = await launch({ - headless: true, - defaultViewport: null, - ignoreDefaultArgs: ["--disable-extensions"], - ...options?.launchOptions, - }); + let browser: Browser; + + if (options?.launchOptions?.browserWSEndpoint) { + browser = await connect({ + browserWSEndpoint: options?.launchOptions?.browserWSEndpoint, + }); + } + else { + browser = await launch({ + headless: true, + defaultViewport: null, + ignoreDefaultArgs: ["--disable-extensions"], + ...options?.launchOptions, + }); + } const page = await browser.newPage(); await page.goto(url, { @@ -161,12 +170,13 @@ export class PuppeteerWebBaseLoader */ static async imports(): Promise<{ launch: typeof launch; + connect: typeof connect; }> { try { // eslint-disable-next-line import/no-extraneous-dependencies - const { launch } = await import("puppeteer"); + const { launch, connect } = await import("puppeteer"); - return { launch }; + return { launch, connect }; } catch (e) { console.error(e); throw new Error( From 1c7d03fc7e724e393418e405878d4a6606bc9882 Mon Sep 17 00:00:00 2001 From: Mikias Tilahun Abebe Date: Mon, 27 Jan 2025 13:29:55 +0300 Subject: [PATCH 2/3] feat(puppeteer) enable connection to existing browser instance for screenshot functionality --- .../src/document_loaders/web/puppeteer.ts | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/libs/langchain-community/src/document_loaders/web/puppeteer.ts b/libs/langchain-community/src/document_loaders/web/puppeteer.ts index 7a61be002a36..7a1429f7457d 100644 --- a/libs/langchain-community/src/document_loaders/web/puppeteer.ts +++ b/libs/langchain-community/src/document_loaders/web/puppeteer.ts @@ -132,14 +132,21 @@ export class PuppeteerWebBaseLoader url: string, options?: PuppeteerWebBaseLoaderOptions ): Promise { - const { launch } = await PuppeteerWebBaseLoader.imports(); + const { launch, connect } = await PuppeteerWebBaseLoader.imports(); - const browser = await launch({ - headless: true, - defaultViewport: null, - ignoreDefaultArgs: ["--disable-extensions"], - ...options?.launchOptions, - }); + let browser: Browser; + if (options?.launchOptions?.browserWSEndpoint) { + browser = await connect({ + browserWSEndpoint: options?.launchOptions?.browserWSEndpoint, + }); + } else { + browser = await launch({ + headless: true, + defaultViewport: null, + ignoreDefaultArgs: ["--disable-extensions"], + ...options?.launchOptions, + }); + } const page = await browser.newPage(); await page.goto(url, { From ef15f41dcec60135500cafacbde08526b1203465 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Wed, 29 Jan 2025 16:02:01 -0800 Subject: [PATCH 3/3] Format --- .../src/document_loaders/web/puppeteer.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/langchain-community/src/document_loaders/web/puppeteer.ts b/libs/langchain-community/src/document_loaders/web/puppeteer.ts index 7a1429f7457d..94283700babc 100644 --- a/libs/langchain-community/src/document_loaders/web/puppeteer.ts +++ b/libs/langchain-community/src/document_loaders/web/puppeteer.ts @@ -53,7 +53,8 @@ export type PuppeteerWebBaseLoaderOptions = { */ export class PuppeteerWebBaseLoader extends BaseDocumentLoader - implements DocumentLoader { + implements DocumentLoader +{ options: PuppeteerWebBaseLoaderOptions | undefined; constructor(public webPath: string, options?: PuppeteerWebBaseLoaderOptions) { @@ -73,8 +74,7 @@ export class PuppeteerWebBaseLoader browser = await connect({ browserWSEndpoint: options?.launchOptions?.browserWSEndpoint, }); - } - else { + } else { browser = await launch({ headless: true, defaultViewport: null,