First, the basic use of scheduled tasks
- Create a new schedule folder under app folder and create watchfile.js under watchfile.js (you can customize it)
const Subscription = require('egg').Subscription;
let i = 0;
class WatchFile extends Subscription {
static get schedule() {
return {
interval: '1s'.type: 'all' // specify that all processes need to be executed}}async subscribe() {
i++;
console.log(i); }}module.exports = WatchFile;
Copy the code
- The console will print the following every 1s
- Here’s a shorthand
let i = 0;
module.exports = {
schedule: {
interval: '1s'.// 1 minute interval
type: 'all'.// specify that all workers need to be executed
},
async task(ctx) {
i++;
console.log(i); }};Copy the code
Two, regularly climb the content of the specified web page
- Install the Cheerio module
This module parses the contents of HTML pages using JQuery syntax.
npm install cheerio
Copy the code
- The import module
const cheerio = require('cheerio');
Copy the code
- Define the crawler module spider.js in the service
'use strict';
const Service = require('egg').Service;
class SpiderService extends Service {
async requestUrl(url) {
const result = await this.ctx.curl(url);
returnresult; }}module.exports = SpiderService;
Copy the code
- The scheduled task parses the content obtained by the service
module.exports = {
schedule: {
interval: '1s'.// 1 minute interval
type: 'all'.// specify that all workers need to be executed
},
async task(ctx) {
const url = "https://news.baidu.com";
const result = await ctx.service.spider.requestUrl(url);
const htmlData = result.data.toString();
const $ = cheerio.load(htmlData,{decodeEntities: false});
$('.hotnews a').each(function() {
console.log($(this).html()); })}};Copy the code