Recently, when I was playing node crawler, I found that the crawling site did reverse crawling process, and when the request frequency was too high, I directly returned the wrong result. So you need a concurrency controller that controls the number of simultaneous requests and, ideally, the time interval between requests

Go straight to code

/ * * * *@param {Object} options 
 * @param {Number} Options. limit Number of concurrent requests *@param {Number} Options. sleepTimeout Specifies the interval length. The default value is 0 */
function Scheduler({
  limit,
  sleepTimeout = 0
}) {

  this.list = []
  this.maxLimit = limit;
  this.parallelNum = 0;

  this.add = function (fn) {
    this.list.push(fn)
  }
  this.taskStart = function () {
    for (var i = 0; i < this.maxLimit; i++) {
      this.request()
    }
  }
  
  The sleep function is used to control the interval between requests
  this.sleep = async function (timeout = sleepTimeout) {
    return new Promise((resolve, reject) = > setTimeout(resolve, timeout))
  }

  this.request = async function () {
    if (!this.list.length || this.parallelNum >= this.maxLimit) return
  
    this.parallelNum++;
    console.log('Current concurrency'.this.parallelNum);
    try {
      // Write asynchronous tasks in synchronous form with await
      await this.list.shift()();
      await this.sleep()
    } catch (error) {
      console.log('error', error);
    } finally {
      this.parallelNum--
      // recursive call
      this.request()
    }
  }
}
Copy the code

use

// Define a request function
function fetchFn (params, timeout = 1000) {
  console.log('params', params);
  return new Promise(resolve= > {
    setTimeout(() = > {
      resolve(params)
    }, timeout)
  })
}

/ / initialization
const scheduler = new Scheduler({
  limit: 4
})

const arr = [
  {params: 1.timeout: 1000},
  {params: 2.timeout: 700},
  {params: 3.timeout: 3000},
  {params: 4.timeout: 100},
  {params: 5.timeout: 1000},
  {params: 6.timeout: 400},
  {params: 7.timeout: 2000}]// Add the requesting function to the scheduler's list
arr.forEach(el= > {
  scheduler.add(() = > fetchFn(el.params, el.timeout))
})

// Start executing concurrent tasks
scheduler.taskStart()
Copy the code