secondparse

The basic parsing function can only parse simple, legal HTML strings like

, but it is the basis of everything. To do this, we need a key re:

// Tag attributes
const attribute = /^\s*([^\s"'<>\/=]+)(? :\s*(=)\s*(? :"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))? /;

// Parse dynamic properties
const dynamicArgAttribute = /^\s*((? :v-[\w-]+:|@|:|#)\[[^=]+?\][^\s"'<>\/=]*)(? :\s*(=)\s*(? :"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))? /
Copy the code

This re is used to match attributes on the tag. With it, you can get any attribute on the tag.

function parse(input) {
    let root = null // Used to save the ast node parsed
    let tagName = ' ' // Name of the label currently being parsed
    // Is the string iterated anyway
    while(input) {
        let textEnd = input.indexOf('<')
        if(textEnd === 0) {// < may be a start tag, an end tag, or just a <
            // First try to match the start tag
            const match = input.match(startTag)
            if(match){
                // The description is the start label
                input = input.slice(match[0].length)
                // Check whether the label is closed properly
                const closeStart = input.match(startTagClose)
                if(closeStart){
                    input = input.slice(closeStart[0].length)
                    // Indicates that the label is closed properly
                    root = {
                        tagName: match[1]}if(closeStart[1= = ='/') {// Indicates a self-closing label
                        input = input.slice(closeStart[0].length)
                        continue;
                    }
                    tagName = root.tagName
                }
            }
            const matchEnd = input.match(endTag)
            if(matchEnd){
                // The end tag is matched
                if(matchEnd[1] !== tagName){
                    // If the end and start labels are not matched, the labels are not valid and cannot be saved
                    root = null
                    break
                }
                input = input.slice(matchEnd[0].length)
            }
        }
    }
    return root
}

console.log('parse', parse('<div></div>'));
Copy the code

Since the attribute is on the start tag, we only need to modify the match start tag and add the code to match the parse attribute:

Note that according to vUE’s rules, V-bind [XXX] = “ee” is a dynamic property

function parse(input) {
    let root = null // Used to save the ast node parsed
    let tagName = ' ' // Name of the label currently being parsed
    // Is the string iterated anyway
    while(input) {
        let textEnd = input.indexOf('<')
        if(textEnd === 0) {// < may be a start tag, an end tag, or just a <

            // First try to match the start tag
            const match = input.match(startTag)
            if(match){
                // The description is the start label
                input = input.slice(match[0].length)
                // Check whether the label is closed properly
                let closeStart = null
                let attr = null
                let matchNode = {
                    tagName: match[1].attrList: []}while(
                    !(closeStart = input.match(startTagClose)) && 
                    (attr = input.match(dynamicArgAttribute) || input.match(attribute)
                ){
                    // Collect attributes
                    matchNode.attrList.push({
                        name: attr[1].value: attr[3] || attr[4] || attr[5]
                    })
                    input = input.slice(attr[0].length)
                }
                if(closeStart){
                    input = input.slice(closeStart[0].length)
                    // Indicates that the label is closed properlyroot = { ... matchNode }if(closeStart[1= = ='/') {// Indicates a self-closing label
                        break;
                    }
                    tagName = root.tagName
                }
            }
            const matchEnd = input.match(endTag)
            if(matchEnd){
                // The end tag is matched
                if(matchEnd[1] !== tagName){
                    // If the end and start labels are not matched, the labels are not valid and cannot be saved
                    root = null
                    break
                }
                input = input.slice(matchEnd[0].length)
            }
        }
    }
    return root
}

console.log('parse', parse('<div id="app" :b="c" v-html="d" :[xxx] = "e"></div>'));

Copy the code

In this case, our target AST will look like this:

root = {
    tagName: 'div'.attrList: [{name: 'id'.value: 'app' }, 
        { name: ':b'.value: 'c' },  
        { name: 'v-html'.value: 'd' }, 
        { name: ':[xxx]'.value: 'e'}}}]Copy the code

To get the structure above, the tag must be a valid tag.

Here’s the body code to parse the attribute:

while(
    !(closeStart = input.match(startTagClose)) && // The end of the non-start tag
    (attr = input.match(dynamicArgAttribute) || input.match(attribute)  // Attributes can be matched) {// Collect attributes
    matchNode.attrList.push({
        name: attr[1].value: attr[3] || attr[4] || attr[5]
    })
    input = input.slice(attr[0].length)
}

Copy the code

At the same time, the generation method of the start label is modified

let closeStart = null // Records the match result at the end of the start tag
let attr = null  // Records the current matched attribute match
let matchNode = {  // Create a temporary start tag because you don't know if it is valid
    tagName: match[1].attrList: []}// Match attributes once
while(
    !(closeStart = input.match(startTagClose)) && 
    (attr = input.match(dynamicArgAttribute) || input.match(attribute))
){
    // Collect attributes
    matchNode.attrList.push({
        name: attr[1].value: attr[3] || attr[4] || attr[5]
    })
    input = input.slice(attr[0].length)
}
// Check whether the label is closed properly
if(closeStart){
    input = input.slice(closeStart[0].length)
    // Indicates that the label is closed properlyroot = { ... matchNode }if(closeStart[1= = ='/') {// Indicates a self-closing label
        break;
    }
    tagName = root.tagName
}
Copy the code

conclusion

At this point, attribute analysis has been added to them, because of the basis of the previous article, so attribute analysis is also handy, the next chapter will add child element analysis. Some knowledge of the stack is involved. The flow chart for this section is here