From cdc92acc870915ed3473f8a2c796be048cab6eac Mon Sep 17 00:00:00 2001 From: KonghaYao <3446798488@qq.com> Date: Sat, 7 Aug 2021 21:50:27 +0800 Subject: [PATCH] fix: request --- package/plugins/Request.js | 4 ++-- test/text-bilibili.js | 31 ++++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/package/plugins/Request.js b/package/plugins/Request.js index dfa4160..f513e73 100644 --- a/package/plugins/Request.js +++ b/package/plugins/Request.js @@ -43,7 +43,7 @@ function request({ url, options = {} }) { // 获取数据为 request console.log('- 爬取 ', url); - return fetch(url, options) + return fetch(url, Object.assign({ cache: 'force-cache' }, options)) .then((res) => { if (!res.ok) { throw Error(res.statusText); @@ -75,7 +75,7 @@ export function Request(options = {}) { // 通过 this.options 来获取传入的参数,这个参数解析都是由 Plugin 开发者来设置逻辑的 // 所以灵活性很高 - const { delay = 200, buffer = 3, retry = 3, handleError = null } = this.options; + const { delay = 200, buffer = 1, retry = 3, handleError = null } = this.options; return ($source) => $source.pipe( diff --git a/test/text-bilibili.js b/test/text-bilibili.js index 6927832..368b315 100644 --- a/test/text-bilibili.js +++ b/test/text-bilibili.js @@ -1,8 +1,8 @@ -import('https://cdn.jsdelivr.net/npm/js-spider@3.2.1/dist/JSpider.esm.min.js').then(async (res) => { +import('https://cdn.jsdelivr.net/npm/js-spider@3.2.2/dist/JSpider.esm.min.js').then(async (res) => { let JSpider = res.default; let { Plugin, - plugins: { ExcelHelper, Request, Download, ZipFile }, + plugins: { ExcelHelper, Request, Download, ZipFile, Combine }, } = JSpider; await JSpider.$load('xlsx'); await JSpider.$load('jszip'); @@ -14,7 +14,10 @@ import('https://cdn.jsdelivr.net/npm/js-spider@3.2.1/dist/JSpider.esm.min.js').t method: 'GET', }, ).then((res) => res.json()); - let urls = [...Array(first.data.numPages).keys()].map((i) => { + + let number = first.data.numPages; + + let urls = [...Array(number).keys()].map((i) => { return { url: `https://api.bilibili.com/x/web-interface/search/type?context=&page=${ i + 1 @@ -25,14 +28,28 @@ import('https://cdn.jsdelivr.net/npm/js-spider@3.2.1/dist/JSpider.esm.min.js').t }, }; }); - window.Result = []; let spider = new JSpider() .pipeline( - Request(), + Request({ + buffer: 1, + }), + Combine(50, 5000), Plugin((data) => { - data.data.result.forEach((item) => ['hit_columns', 'new_rec_tags'].forEach((ii) => (item[ii] = ''))); - window.Result.push(data.data.result); + return data + .map((i) => { + i.data.result.forEach((item) => { + ['hit_columns', 'new_rec_tags'].forEach((ii) => (item[ii] = '')); + item.pubdate = new Date(Number(item.pubdate + '000')).toLocaleDateString(); + }); + return i.data.result; + }) + .flat() + .flat(); + }), + ExcelHelper((dataset) => { + return { a: dataset }; }), + Download(), ) .crawl(urls) .start();