secondparse
The basic parsing function can only parse simple, legal HTML strings like
, but it is the basis of everything. To do this, we need a key re:
// Tag attributes
const attribute = /^\s*([^\s"'<>\/=]+)(? :\s*(=)\s*(? :"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))? /;
// Parse dynamic properties
const dynamicArgAttribute = /^\s*((? :v-[\w-]+:|@|:|#)\[[^=]+?\][^\s"'<>\/=]*)(? :\s*(=)\s*(? :"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))? /
Copy the code
This re is used to match attributes on the tag. With it, you can get any attribute on the tag.
function parse(input) {
let root = null // Used to save the ast node parsed
let tagName = ' ' // Name of the label currently being parsed
// Is the string iterated anyway
while(input) {
let textEnd = input.indexOf('<')
if(textEnd === 0) {// < may be a start tag, an end tag, or just a <
// First try to match the start tag
const match = input.match(startTag)
if(match){
// The description is the start label
input = input.slice(match[0].length)
// Check whether the label is closed properly
const closeStart = input.match(startTagClose)
if(closeStart){
input = input.slice(closeStart[0].length)
// Indicates that the label is closed properly
root = {
tagName: match[1]}if(closeStart[1= = ='/') {// Indicates a self-closing label
input = input.slice(closeStart[0].length)
continue;
}
tagName = root.tagName
}
}
const matchEnd = input.match(endTag)
if(matchEnd){
// The end tag is matched
if(matchEnd[1] !== tagName){
// If the end and start labels are not matched, the labels are not valid and cannot be saved
root = null
break
}
input = input.slice(matchEnd[0].length)
}
}
}
return root
}
console.log('parse', parse('<div></div>'));
Copy the code
Since the attribute is on the start tag, we only need to modify the match start tag and add the code to match the parse attribute:
Note that according to vUE’s rules, V-bind [XXX] = “ee” is a dynamic property
function parse(input) {
let root = null // Used to save the ast node parsed
let tagName = ' ' // Name of the label currently being parsed
// Is the string iterated anyway
while(input) {
let textEnd = input.indexOf('<')
if(textEnd === 0) {// < may be a start tag, an end tag, or just a <
// First try to match the start tag
const match = input.match(startTag)
if(match){
// The description is the start label
input = input.slice(match[0].length)
// Check whether the label is closed properly
let closeStart = null
let attr = null
let matchNode = {
tagName: match[1].attrList: []}while(
!(closeStart = input.match(startTagClose)) &&
(attr = input.match(dynamicArgAttribute) || input.match(attribute)
){
// Collect attributes
matchNode.attrList.push({
name: attr[1].value: attr[3] || attr[4] || attr[5]
})
input = input.slice(attr[0].length)
}
if(closeStart){
input = input.slice(closeStart[0].length)
// Indicates that the label is closed properlyroot = { ... matchNode }if(closeStart[1= = ='/') {// Indicates a self-closing label
break;
}
tagName = root.tagName
}
}
const matchEnd = input.match(endTag)
if(matchEnd){
// The end tag is matched
if(matchEnd[1] !== tagName){
// If the end and start labels are not matched, the labels are not valid and cannot be saved
root = null
break
}
input = input.slice(matchEnd[0].length)
}
}
}
return root
}
console.log('parse', parse('<div id="app" :b="c" v-html="d" :[xxx] = "e"></div>'));
Copy the code
In this case, our target AST will look like this:
root = {
tagName: 'div'.attrList: [{name: 'id'.value: 'app' },
{ name: ':b'.value: 'c' },
{ name: 'v-html'.value: 'd' },
{ name: ':[xxx]'.value: 'e'}}}]Copy the code
To get the structure above, the tag must be a valid tag.
Here’s the body code to parse the attribute:
while(
!(closeStart = input.match(startTagClose)) && // The end of the non-start tag
(attr = input.match(dynamicArgAttribute) || input.match(attribute) // Attributes can be matched) {// Collect attributes
matchNode.attrList.push({
name: attr[1].value: attr[3] || attr[4] || attr[5]
})
input = input.slice(attr[0].length)
}
Copy the code
At the same time, the generation method of the start label is modified
let closeStart = null // Records the match result at the end of the start tag
let attr = null // Records the current matched attribute match
let matchNode = { // Create a temporary start tag because you don't know if it is valid
tagName: match[1].attrList: []}// Match attributes once
while(
!(closeStart = input.match(startTagClose)) &&
(attr = input.match(dynamicArgAttribute) || input.match(attribute))
){
// Collect attributes
matchNode.attrList.push({
name: attr[1].value: attr[3] || attr[4] || attr[5]
})
input = input.slice(attr[0].length)
}
// Check whether the label is closed properly
if(closeStart){
input = input.slice(closeStart[0].length)
// Indicates that the label is closed properlyroot = { ... matchNode }if(closeStart[1= = ='/') {// Indicates a self-closing label
break;
}
tagName = root.tagName
}
Copy the code
conclusion
At this point, attribute analysis has been added to them, because of the basis of the previous article, so attribute analysis is also handy, the next chapter will add child element analysis. Some knowledge of the stack is involved. The flow chart for this section is here