Preface: I recently to the national population trend is more interested in, want to see the future population development trend and so on, read a lot of articles are not what I want, finally found the national data this website, I want to have all the data it, all I want to climb down all the data, and then their own graphics it.


Here’s how to crawl data using Node.js

Let’s start with F12 and look at its interface

const axios = require('axios');

const commonCfg = {
  method'get'.url'http://data.stats.gov.cn/easyquery.htm'.headers: { 'User-Agent''the Mozilla / 5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko)'}}let params = {
  m'QueryData'.dbcode'hgnd'.rowcode'zb'.colcode'sj'.wds'[]'.k1Date.now() + ' ',}/** Total population male/female urban/rural */
async function getTotal(year{
  const response = awaitaxios({ ... commonCfg,params: { ...params, dfwds`[{"wdcode":"sj","valuecode":"${year}"`}]}})let result = []
  const { datanodes } = response.data.returndata
  datanodes.forEach(item= > {
    result.push(item.data.data)
  })
  return dealRes(result)
}
/** Format */
function fmtNum(num) {
  return Math.floor(num * 10000)}/**result[0] total population, result[1] male, result[2] female, result[3] city, result[4] township, */
function dealRes(result) {
  return {
    total: fmtNum(result[0]),
    manCount: fmtNum(result[1]),
    womanCount: fmtNum(result[2]),
    cityCount: fmtNum(result[3]),
    villageCount: fmtNum(result[4]),}}/ / execution
getTotal(2011).then(res= > {
  console.log(res)
})
Copy the code

The node start ~ ~


I went to search again and found that someone had used Python to crawl this website and recorded it completely. Python crawled the relevant data of the National Bureau of Statistics (original) and looked at the big guy’s code. I know what the problem is

We make a request every time and get the data and then stop. If we want to search, we have to set up a Session, and then change the DFWDS to the content in the picture above to get it successfully.


I see! Search for axios-Cookiejar-support and reconfigure it to crawl out the birth and death rates. The difference is in valuecode.

  • Valuecode = A0301 population
  • The birth rate valuecode = A0302

Other data such as Gdp, household consumption level, industry and agriculture are also valuecode values.

Go straight to the full code

const axios = require('axios');
const tough = require('tough-cookie');
const axiosCookieJarSupport = require('axios-cookiejar-support').default;
axiosCookieJarSupport(axios);
const cookieJar = new tough.CookieJar();
axios.defaults.jar = cookieJar;
axios.defaults.withCredentials = true;
/ * * * crawl target reference python crawler https://www.jianshu.com/p/9827a052da91 http://data.stats.gov.cn/easyquery.htm?cn=C01 * * The dependency above is to establish a session * each real request must be requested first to establish session transmission: valuecode=A0301 for total population valuecode=A0302 for birth rate */
const commonCfg = {
  method'get'.url'http://data.stats.gov.cn/easyquery.htm'.headers: { 'User-Agent''the Mozilla / 5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko)'}}let params = {
  m'QueryData'.dbcode'hgnd'.rowcode'zb'.colcode'sj'.wds'[]'.k1Date.now() + ' ',}/** Establish session before the total population request */
async function initTotal(year{
  awaitaxios({ ... commonCfg,params: { ...params, dfwds'[{"wdcode":"zb","valuecode":"A0301"}]'}})return await getTotal(year)
}
/** Total population male/female urban/rural */
async function getTotal(year{
  const response = awaitaxios({ ... commonCfg,params: { ...params, dfwds`[{"wdcode":"sj","valuecode":"${year}"`}]}})let result = []
  const { datanodes } = response.data.returndata
  datanodes.forEach(item= > {
    result.push(item.data.data)
  })
  return dealRes(result)
}
/** Format */
function fmtNum(num{
  return Math.floor(num * 10000)}/**result[0] total population, result[1] male, result[2] female, result[3] city, result[4] township, */
function dealRes(result{
  return {
    total: fmtNum(result[0]),
    manCount: fmtNum(result[1]),
    womanCount: fmtNum(result[2]),
    cityCount: fmtNum(result[3]),
    villageCount: fmtNum(result[4]),}}/** Establish session before birth/death request */
async function initRate(year{
  awaitaxios({ ... commonCfg,params: { ...params, dfwds'[{"wdcode":"zb","valuecode":"A0302"}]'}})return await getRate(year)
}
/** Birth and death rates */
async function getRate(year{
  const response = awaitaxios({ ... commonCfg,params: { ...params, dfwds`[{"wdcode":"sj","valuecode":"${year}"`}]}})let result = []
  const { datanodes } = response.data.returndata
  datanodes.forEach(item= > {
    result.push(item.data.data)
  })
  return {
    bornRate: result[0].deathRate: result[1]}}/ * * * / execution
const YEAR = 2011
Promise.all([initTotal(YEAR), initRate(YEAR)]).then(res= > {
  const sql = { year: YEAR, ... res[0], ...res[1]}console.log(sql)
});
Copy the code

Print out the

The total population, male and female population, urban and rural population, birth rate and death rate were obtained. A perfect item: ؏؏☝ᖗ乛 plus-one item 乛ᖘ☝؏؏