Use AST to elegantly customize your code
AST profile
AST (Abstract Syntax Tree) is an Abstract representation of the Syntax structure of source code. It represents the syntactic structure of a programming language as a tree, with each node in the tree representing a structure in the source code. In short, it is a tree-structured representation of the source code.
AST principle
Generating an AST is generally divided into the following steps:
Lexical analysis
Splits the entire code string into an array of minimal syntax units
/ / the source code
const foo = 'hello world';
// Tokens[{"type": "Keyword"."value": "var"
},
{
"type": "Identifier"."value": "foo"
},
{
"type": "Punctuator"."value": "="
},
{
"type": "String"."value": "'hello world'"}]Copy the code
Syntax analysis
On the basis of word segmentation, the relationship between grammatical units is established and analyzed, and the words are combined in a three-dimensional way to determine the relationship between words and the final expression meaning of words. To put it simply, parsing is a recursive process of identifying statements and expressions and determining previous relationships.
Hand stroking a minimalist AST
In order to better understand the principle of AST, here I refer to the Acorn library to make a minimal version of the code parser, through this Demo we can understand the code parser working principle.
Here’s the code to parse this time, and the implementation only supports these two syntaxes.
import moduleA from "./moduleA.js";
import moduleB from "./moduleB.js";
function add(v1, v2) { return v1 + v2 }
Copy the code
Lexical analysis
As can be seen from the above code, the following tokens appear
- Key words:
import
,from
,function
,return
- Common name:
moduleA
,moduleB
,add
,v1
,v2
- Partial symbols:
- Semi-colon (semi) :
;
- Comma:
.
- Dot:
.
- ParenL:
(
- ParenR:
)
- Open brace (braceL) :
{
- BraceR:
}
- Plus sign (plusMin) :
+
- Semi-colon (semi) :
- Terminator:
eof
The following word segmentation for this string of source code, the specific code is as follows:
// Enumeration of keywords and symbols
const types = {
keyword: ["import"."from"."function"."return"].punctuation: [";".","."."."(".")"."{"."}"."+"."-"]};// Get the type of the word
function getTokenType(token) {
if (types.keyword.includes(token)) {
return 'Keyword';
} else if (types.punctuation.includes(token)) {
return 'Punctuation';
} else {
return 'Identifier'; }}/ / word segmentation
export function tokenizer(input) {
// The index of the current retrieval is used to track our position in the code like a cursor.
let current = 0;
// Array of 'tokens' to hold our results
let tokens = [];
let token = ' ';
let stringStart = false;
while (current < input.length) {
const char = input[current];
// Skip if it is a space
if (/\s/.test(char) && ! token) { current++;continue;
}
// If it is a string
if (char === '"') {
if(! stringStart) { stringStart =true;
token += char;
} else {
stringStart = false;
token += char;
tokens.push({
type: "String".value: token
});
token = ' ';
}
current++;
continue;
}
// If a newline or space is encountered, add it to the return array
if (/[\r\t|\s]/.test(char) && ! stringStart) { tokens.push({type: getTokenType(token),
value: token
});
token = ' ';
current++;
continue;
}
// If parentheses, commas, and dots are encountered, add them to the returned array
if (/ [; | \ (| \ | the \ | {| \} \. |,] /.test(char) && ! stringStart) {if (token) tokens.push({
type: getTokenType(token),
value: token
});
tokens.push({
type: 'Punctuation'.value: char
});
token = ' ';
current++;
continue;
}
token += char;
current++;
}
// return the segmentation result
return tokens;
}
Copy the code
The results after word segmentation are as follows:
[{type: 'Keyword'.value: 'import' },
{ type: 'Identifier'.value: 'moduleA' },
{ type: 'Keyword'.value: 'from' },
{ type: 'String'.value: '"./moduleA.js"' },
{ type: 'Punctuation'.value: '; ' },
{ type: 'Keyword'.value: 'import' },
{ type: 'Identifier'.value: 'moduleB' },
{ type: 'Keyword'.value: 'from' },
{ type: 'String'.value: '"./moduleB.js"' },
{ type: 'Punctuation'.value: '; ' },
{ type: 'Keyword'.value: 'function' },
{ type: 'Identifier'.value: 'add' },
{ type: 'Punctuation'.value: '(' },
{ type: 'Identifier'.value: 'v1' },
{ type: 'Punctuation'.value: ', ' },
{ type: 'Identifier'.value: 'v2' },
{ type: 'Punctuation'.value: ') ' },
{ type: 'Punctuation'.value: '{' },
{ type: 'Keyword'.value: 'return' },
{ type: 'Identifier'.value: 'v1' },
{ type: 'Punctuation'.value: '+' },
{ type: 'Identifier'.value: 'v2' },
{ type: 'Punctuation'.value: '} ' },
{ type: 'Identifier'.value: 'add' },
{ type: 'Punctuation'.value: '(' },
{ type: 'Identifier'.value: 'moduleA' },
{ type: 'Punctuation'.value: '. ' },
{ type: 'Identifier'.value: 'val' },
{ type: 'Punctuation'.value: ', ' },
{ type: 'Identifier'.value: 'moduleB' },
{ type: 'Punctuation'.value: '. ' },
{ type: 'Identifier'.value: 'val' },
{ type: 'Punctuation'.value: ') ' },
{ type: 'Punctuation'.value: '; '}]Copy the code
Syntax analysis
Parsing typically uses regular expressions (such as template parsing in Vue) or finite state machines
Finite-state machines are a very useful model for simulating most things in the world.
It has three characteristics:
- The total number of states is finite.
- In one state at any one time.
- Under certain conditions, they transition from one state to another.
The following parser for the above code implementation, the general idea is to conceive the various states encountered during the parsing process, by maintaining the state of the state machine to perform different operations.
/ / the parser
export function parse(tokens) {
// AST
const program = {
type: 'Program'.body: []}// Current subscript
let current = 0;
// Program syntax for the current subitem
let statement = {};
/** * / ImportDeclaration: export state * FunctionDeclaration: function state * ParamsDeclaration: function parameter state * BodysDeclaration: Function posture * ReturnDeclaration: function return state */
let currentState = 'default'
while (current < tokens.length) {
// The current word
const token = tokens[current];
/ / the last word
const preToken = tokens[current - 1];
switch (token.type) {
// Match the keyword
case 'Keyword':
switch (token.value) {
// If it is import
case 'import':
currentState = 'ImportDeclaration'
// Set type to "Export syntax"
statement.type = 'ImportDeclaration';
// Set the export identity array
statement.specifiers = [];
// Set the source export module
statement.source = {};
break;
// function
case 'function':
currentState = 'FunctionDeclaration'
// Set type to "function syntax"
statement.type = 'FunctionDeclaration';
// Set the function name
statement.id = {};
// Set parameters
statement.params = [];
// Set the function method body
statement.body = {
// Since I only support block-level functions here, I just write it dead. Normally, it also needs to be compatible with writing methods like arrow functions
type: "BlockStatement".body: []};break;
// 如果是return
case 'return':
currentState = 'ReturnDeclaration';
break;
}
current++;
continue;
// Match punctuation
case 'Punctuation':
switch (token.value) {
case '; ':
program.body.push(statement);
statement = {};
break;
case '(':
// If the current state is function state, switch to parameter state
if (currentState === 'FunctionDeclaration') {
currentState = 'ParamsDeclaration';
}
break;
case ') ':
switch (currentState) {
case 'ParamsDeclaration':
// If the current argument is function argument and a close parenthesis is encountered, then the argument is turned off to function state
currentState = 'FunctionDeclaration'
break;
}
break;
case '{':
// If the current state is function state, switch to parameter state
if (currentState === 'FunctionDeclaration') {
currentState = 'BodysDeclaration';
}
break;
case '} ':
switch (currentState) {
case 'BodysDeclaration':
// If the current position is function posture and a close parenthesis is encountered, then the argument state is set to function state
currentState = 'FunctionDeclaration'
break;
}
break;
}
current++;
continue;
// Match the identifier
case 'Identifier':
switch (currentState) {
// If the current state is imported
case 'ImportDeclaration':
switch (preToken.value) {
case 'import':
// We use an array because the import can be {} multiple exports
// Enter the import export flag
statement.specifiers.push({
type: 'ImportDefaultSpecifier'.local: {
type: token.type,
name: token.value
}
});
break;
}
break;
// If the current state is functional
case 'FunctionDeclaration':
// Fill in the function name
statement.id = {
type: "Identifier".name: token.value
}
break;
// If the current state is function parameter state
case 'ParamsDeclaration':
statement.params.push({
type: "Identifier".name: token.value
})
break;
// If the current state is function posture
case 'BodysDeclaration':
// Not every function starts with a return
// So this state is used to process methods inside functions
break;
// If the current state is function return state
case 'ReturnDeclaration':
const nextCommaIndex = tokens.slice(current).findIndex(e= > e.value === '} ')
const expression = tokens.slice(current, current + nextCommaIndex).map(e= > e.value);
// Match by re, if it is a binary expression
if (/\S*[\+|\-|\*|\/|\%]\S*/.test(expression.join(' '))) {
statement.body.body.push({
type: "ReturnStatement".argument: {
type: "BinaryExpression".left: {
type: "Identifier".name: expression[0]},operator: expression[1].right: {
type: "Identifier".name: expression[2]
}
}
})
}
program.body.push(statement);
current += nextCommaIndex;
break;
}
current++;
continue;
// Matches the string
case 'String':
// If the current state is import
switch (currentState) {
case 'ImportDeclaration':
switch (preToken.value) {
case 'from':
// If the last keyword is from, add the imported source path information
statement.source = {
type: "Literal".value: token.value.substr(1, token.value.length - 2),
raw: token.value
}
break;
}
break;
}
current++;
continue;
}
// As a general rule, the AST will report an error if no match is found here. It means a grammatical error
// But I'm only doing the simplest matching here, so skip the unsupported syntax
current++;
continue;
}
return program
}
Copy the code
Here is the syntax tree after the parser parses the word segmentation results. Here I refer to acorn’s syntax splicing
{
"type": "Program"."body": [{"type": "ImportDeclaration"."specifiers": [{"type": "ImportDefaultSpecifier"."local": {
"type": "Identifier"."name": "moduleA"}}]."source": {
"type": "Literal"."value": "./moduleA.js"."raw": "\"./moduleA.js\""}}, {"type": "ImportDeclaration"."specifiers": [{"type": "ImportDefaultSpecifier"."local": {
"type": "Identifier"."name": "moduleB"}}]."source": {
"type": "Literal"."value": "./moduleB.js"."raw": "\"./moduleB.js\""}}, {"type": "FunctionDeclaration"."id": {
"type": "Identifier"."name": "add"
},
"params": [{"type": "Identifier"."name": "v1"
},
{
"type": "Identifier"."name": "v2"}]."body": {
"type": "BlockStatement"."body": [{"type": "ReturnStatement"."argument": {
"type": "BinaryExpression"."left": {
"type": "Identifier"."name": "v1"
},
"operator": "+"."right": {
"type": "Identifier"."name": "v2"}}}]}}Copy the code
Here we have implemented the simplest JS parser, and you can see that actually implementing a fully usable JS parser is quite a huge project. So standing on the shoulders of giants, we’ll go straight to existing mature JS parsers.
Common JS parser
Esprima
The first JavaScript parser written in JavaScript to comply with the EsTree specification, and many subsequent compilers have been influenced by it.
Acorn
Acorn is similar to Esprima in that the output AST is EsTree compliant. The ast parser in WebPack currently uses Acorn and, like Esprima, does not support JSX directly.
@babel/parser(babylon)
Babel’s official parser, originally forked by Acorn, has gone its own way entirely, and the plug-in architecture it has built since rebranding Babylon is very powerful.
Espree
The default parser for ESLint, Prettier, was originally forked from Esprima (V1.2.2) and later developed for Acorn because Esprima did not support it for a short time due to the rapid development of ES6.
Use AST to elegantly modify your code
Once we understood how the JS parser generated the AST, we began to use the AST to elegantly modify our code to achieve the functionality we wanted. And I’m going to give you two examples to do that.
tree shaking
To understand what an AST does, let’s implement a simple tree shaking.
- First we get the need to do
tree shaking
The source code - Pass the source code
acorn
Convert the AST - Find all of themVariable declarationsandFunction declarationAnd stored to
decls
- Find all of themexpressionAnd stored to
calledDecls
- traverse
decls
Find unwanted nodes to storecode
In the array - through
code
removeast
A node that is not used in - Regenerate code
First, let’s assume that the following code is the source code for tree shaking where the Subtract method and num3 variable are not used.
const add = (a, b) = > a + b;
function subtract(a, b) { return a - b }
const num1 = 1;
const num2 = 10;
const num3 = 100;
add(num1, num2);
Copy the code
Then we wrap the tree-shaking method. The general idea here is the opposite of the useful nodes in webpack’s Tree Shaking tag. I find all the variable declarations and function declarations, and then I find all the expressions that use functions and variables. Finally, unneeded nodes are removed from the AST and the AST is regenerated using EscodeGen. The specific code is as follows:
import * as acorn from 'acorn';
import escodegen from 'escodegen';
// tree shaking method
export function shaking(input) {
// Decls stores all function or variable declaration type nodes
const decls = new Map(a);// calledDecls stores expression nodes used in the code
const calledDecls = [];
// code stores nodes that are not matched by node types
const code = [];
// Convert to AST
const program = acorn.parse(input, {
sourceType: 'module'
});
// Traverse the AST to mark valid nodes
for (let node of program.body) {
switch (node.type) {
// Add decls to the variable declaration
case 'VariableDeclaration':
for (let decl of node.declarations) {
decls.set(decl.id, node);
}
break;
// add decls to the function declaration
case 'FunctionDeclaration':
decls.set(node.id, node);
break;
// Add calledDecls to the expression
case 'ExpressionStatement':
if (node.expression.type == "CallExpression") {
calledDecls.push(node);
}
break; }};// Walk through to find unused code
for (let [key, value] of decls) {
if(! calledDecls.some(e= >e.expression.callee.name === key.name) && ! calledDecls.some(e= > e.expression.arguments.some(c= >c.name === key.name))) { code.push(value); }}// Traverse to find the deleted node
let deleteIndex = [];
for (let i in code) {
for (let j in program.body) {
if (JSON.stringify(code[i]) === JSON.stringify(program.body[j])) {
deleteIndex.push(j);
}
}
}
program.body = program.body.filter((e, i) = >! deleteIndex.includes(i.toString()));// AST converts to Code
return escodegen.generate(program);
}
Copy the code
The output here is:
import fs from 'fs';
import { shaking } from './treeshaking.js'
const sourceCode = fs.readFileSync('./tree-shaking/source.js').toString();
const treeshaking = shaking(sourceCode);
console.log(treeshaking);
/** * const add = (a, b) => a + b; * const num1 = 1; * const num2 = 10; * add(num1, num2); * /
Copy the code
Static image upload replacement
We know that using CDN is a common optimization point in front-end performance optimization. Generally, CDN sets up multiple node servers, which are distributed in different areas, facilitating data transmission and access by users. When a node has a problem, data transmission can still be completed through other nodes, which can improve the response speed of users to visit the website. And the current cloud providers basically adopt “distributed storage”, distribute the content of the central platform to the edge servers around, so that users can get the content nearby, reduce the network time, improve the user access response speed and hit ratio. Using techniques such as indexing and caching.
A typical example, when we are in the development of small programs, because after the packaging bag size no more than 2 m, so this time we upload pictures to the CDN is an effective solution to reduce packet size, but also convenient for local development and improve the efficiency of development, we hope that can be used directly in local development of local pictures. Therefore, we can use AST to realize a custom Webpack Loader to upload static pictures to CDN and modify links.
Here I use Vue’s project as an example. The steps are as follows
Initialize the VUE project
Modify the app. vue code as follows, here only the introduction of logo.png image is retained
<template>
<div>
<img alt="Vue logo" src="./assets/logo.png" />
</div>
</template>
<script lang="ts">
import { defineComponent } from "vue";
export default defineComponent({
name: "App",
});
</script>
Copy the code
Implementing OSS upload
Create a plugins/oss-upload.js file where we encapsulate the ali-OSS upload method
const OSS = require('ali-oss');
// Uniform prefix
const prePath = 'web-image'
let client = new OSS({
accessKeyId: 'your accessKeyId'.accessKeySecret: 'your accessKeySecret'.bucket: 'wynne-typora'.region: 'oss-cn-beijing'
})
// Upload method
module.exports = function ossUpload(fileName, fileUrl) {
return client.put(`${prePath}/${fileName}`, fileUrl);
}
Copy the code
Implement a custom Loader
Create plugins/oss-image-upload.js file, which is used as the function of our custom loader. The specific ideas are as follows:
- First of all take out
template
In thehtml
- use
htmlparser2
forHTML
Parse, generateJSON
- traverse
JSON
Get theimg
Tag and uploadsrc
Pointing picture - Replace after uploading
src
Path, changed to ali cloud OSS path - through
htmlparser-to-html
To regenerate theHTML
- Modifying a resource File
const HtmlParser = require("htmlparser2");
const HtmlparserToHtml = require('htmlparser-to-html');
const OssUpload = require('./oss-upload');
const path = require('path')
module.exports = function (source) {
// Start the asynchronous Loader
const callback = this.async();
// Get the template template content
const template = source.match(/<template>[\s\S]*<\/template>/) [0];
/ / for HTML
const html = template.replace(/ < / /? template>/g.' ');
// Obtain the resource path
let htmlJson = HtmlParser.parseDocument(html);
/ / binding
traverseElement = traverseElement.bind(this);
// Walk through the DOM and upload the image
traverseElement(htmlJson.childNodes).then(() = > {
// Convert JSON back to HTML
const cdnHtml = HtmlparserToHtml(htmlJson.childNodes)
/ / replace the template
source = source.replace(/<template>[\s\S]*<\/template>/.`<template>${cdnHtml}<\/template>`)
// Returns the loader result
callback(null, source)
})
};
// Traverse the DOM node and upload the image
async function traverseElement(elements) {
const resourcesPath = path.parse((this.resourcePath)).dir;
for (let node of elements) {
// If tap is an image and has a SRC field
if (node.type === 'tag' && node.name === 'img' && node.attribs && node.attribs.src) {
// Get the image path
const filePath = path.resolve(resourcesPath, node.attribs.src)
// Get the file name
const fileName = path.parse(filePath).base;
// Enable upload
node.attribs.src = await imageUpload(fileName, filePath);
}
// If there are subsets, continue recursing
if (node.children) {
awaittraverseElement(node.children); }}}// Upload to OSS and output logs
async function imageUpload(fileName, filePath) {
return new Promise((resolve, reject) = > {
/ / upload OSS
OssUpload(fileName, filePath).then(res= > {
if (res.res.status === 200) {
console.log(` \ r \ n resources${fileName}Successfully uploaded ')}else {
console.log(` \ r \ n resources${filePath}Upload failed ', res.res.statusMessage) } resolve(res.url); })})}Copy the code
Local Loader Debugging
We initialize the new vue. Config. js in the newly initialized vue project, and the new loader here points to the local loader file we wrote
const path = require('path')
module.exports = {
chainWebpack: (config) = > {
config.module
.rule('oss-image-upload')
.test(/\.vue$/)
.use('oss-image-upload')
.loader(path.resolve(__dirname,'./plugins/oss-image-upload'))
.end()
}
}
Copy the code
After executing NPM run build, you can see that the custom loader has taken effect
And then let’s deploy the project and open it up and see what happens