Author: Ji Zhi
ParserInline
As we saw in ParserCore, after ParserCore processing, a token of type inline is generated. The next step is to hand it to ParserInline. The rule function looks like this:
module.exports = function inline(state) {
var tokens = state.tokens, tok, i, l;
// Parse inlines
for (i = 0, l = tokens.length; i < l; i++) {
tok = tokens[i];
if (tok.type === 'inline') { state.md.inline.parse(tok.content, state.md, state.env, tok.children); }}};Copy the code
The parse method ParserInline is called when the token of type is inline. ParserInline is located in lib/parser_inline.js.
var _rules = [
[ 'text'.require('./rules_inline/text')], ['newline'.require('./rules_inline/newline')], ['escape'.require('./rules_inline/escape')], ['backticks'.require('./rules_inline/backticks')], ['strikethrough'.require('./rules_inline/strikethrough').tokenize ],
[ 'emphasis'.require('./rules_inline/emphasis').tokenize ],
[ 'link'.require('./rules_inline/link')], ['image'.require('./rules_inline/image')], ['autolink'.require('./rules_inline/autolink')], ['html_inline'.require('./rules_inline/html_inline')], ['entity'.require('./rules_inline/entity')]];var _rules2 = [
[ 'balance_pairs'.require('./rules_inline/balance_pairs')], ['strikethrough'.require('./rules_inline/strikethrough').postProcess ],
[ 'emphasis'.require('./rules_inline/emphasis').postProcess ],
[ 'text_collapse'.require('./rules_inline/text_collapse')]];function ParserInline() {
var i;
this.ruler = new Ruler();
for (i = 0; i < _rules.length; i++) {
this.ruler.push(_rules[i][0], _rules[i][1]);
}
this.ruler2 = new Ruler();
for (i = 0; i < _rules2.length; i++) {
this.ruler2.push(_rules2[i][0], _rules2[i][1]); }}Copy the code
From the constructor, we can see that ParserInline, unlike ParserBlock, has two instances of Ruler. Ruler is called in tokenize and Ruler2 is used after Tokenize.
ParserInline.prototype.tokenize = function (state) {
var ok, i,
rules = this.ruler.getRules(' '),
len = rules.length,
end = state.posMax,
maxNesting = state.md.options.maxNesting;
while (state.pos < end) {
if (state.level < maxNesting) {
for (i = 0; i < len; i++) {
ok = rules[i](state, false);
if (ok) { break; }}}if (ok) {
if (state.pos >= end) { break; }
continue;
}
state.pending += state.src[state.pos++];
}
if(state.pending) { state.pushPending(); }}; ParserInline.prototype.parse =function (str, md, env, outTokens) {
var i, rules, len;
var state = new this.State(str, md, env, outTokens);
this.tokenize(state);
rules = this.ruler2.getRules(' ');
len = rules.length;
for (i = 0; i < len; i++) { rules[i](state); }};Copy the code
Parse is already inside the parse function by passing a token of type inline to the md.inline-parse method. The state for ParserInline is generated. Remember the state of ParserCore and ParserBlock? They are used to store the state information of different Parses during parse.
Let’s start with the State class, which is located in lib/rules_inline/ state_inline-js.
function StateInline(src, md, env, outTokens) {
this.src = src;
this.env = env;
this.md = md;
this.tokens = outTokens;
this.pos = 0;
this.posMax = this.src.length;
this.level = 0;
this.pending = ' ';
this.pendingLevel = 0;
this.cache = {};
this.delimiters = [];
}
Copy the code
List some useful fields:
- pos
The string index of the current token’s content
- posMax
Maximum index of the current token’s content
- pending
Store a complete string, such as
let src = "**emphasis**"
let state = new StateInline(src)
// state. Pending = 'element'
Copy the code
- delimiters
Store delimiters for special tokens, such as *, ~, etc. The element format is as follows:
{
close:false
end:- 1
jump:0
length:2
level:0
marker:42
open:true
token:0
}
// marker indicates the ASCII code corresponding to the string
Copy the code
After state is generated, the tokenize method is called.
ParserInline.prototype.tokenize = function (state) {
var ok, i,
rules = this.ruler.getRules(' '),
len = rules.length,
end = state.posMax,
maxNesting = state.md.options.maxNesting;
while (state.pos < end) {
if (state.level < maxNesting) {
for (i = 0; i < len; i++) {
ok = rules[i](state, false);
if (ok) { break; }}}if (ok) {
if (state.pos >= end) { break; }
continue;
}
state.pending += state.src[state.pos++];
}
if(state.pending) { state.pushPending(); }};Copy the code
First get the default rule chain and then scan the Content field of token from the first character to the end. Call rule function of ruler for each character in turn. They are located under the lib/rules_inline/ folder. The order of invocation is as follows:
-
text.js
module.exports = function text(state, silent) { var pos = state.pos; while(pos < state.posMax && ! isTerminatorChar(state.src.charCodeAt(pos))) { pos++; }if (pos === state.pos) { return false; } if(! silent) { state.pending += state.src.slice(state.pos, pos); } state.pos = pos;return true; }; Copy the code
Extract consecutive non-isterminatorchar characters. IsTerminatorChar characters are specified as follows:
function isTerminatorChar(ch) { switch (ch) { case 0x0A/* \n */: case 0x21/ *! * /: case 0x23/ * # * /: case 0x24/ * $* /: case 0x25/ * % * /: case 0x26/ * and * /: case 0x2A/ * * * /: case 0x2B/ * + * /: case 0x2D/ * - * /: case 0x3A/* : */: case 0x3C/ * (* /: case 0x3D/ * = * /: case 0x3E/ * * / >: case 0x40/ * @ * /: case 0x5B[/ * * /: case 0x5C/ * \ * /: case 0x5D/* ] */: case 0x5E/ * ^ * /: case 0x5F/ * _ * /: case 0x60/ * ` * /: case 0x7B{/ * * /: case 0x7D/ *} * /: case 0x7E/ * ~ * /: return true; default: return false; }}Copy the code
If the input is “__ad__”, then the rule extracts the “AD” string.
-
newline.js
module.exports = function newline(state, silent) { var pmax, max, pos = state.pos; if(state.src.charCodeAt(pos) ! = =0x0A/* \n */) { return false; } pmax = state.pending.length - 1; max = state.posMax; if(! silent) {if (pmax >= 0 && state.pending.charCodeAt(pmax) === 0x20) { if (pmax >= 1 && state.pending.charCodeAt(pmax - 1) = = =0x20) { state.pending = state.pending.replace(/ + $/.' '); state.push('hardbreak'.'br'.0); } else { state.pending = state.pending.slice(0.- 1); state.push('softbreak'.'br'.0); }}else { state.push('softbreak'.'br'.0); } } pos++; while (pos < max && isSpace(state.src.charCodeAt(pos))) { pos++; } state.pos = pos; return true; }; Copy the code
Handles newline characters (\n).
-
escape.js
module.exports = function escape(state, silent) { var ch, pos = state.pos, max = state.posMax; if(state.src.charCodeAt(pos) ! = =0x5C/ * \ * /) { return false; } pos++; if (pos < max) { ch = state.src.charCodeAt(pos); if (ch < 256&& ESCAPED[ch] ! = =0) { if(! silent) { state.pending += state.src[pos]; } state.pos +=2; return true; } if (ch === 0x0A) { if(! silent) { state.push('hardbreak'.'br'.0); } pos++; // skip leading whitespaces from next line while (pos < max) { ch = state.src.charCodeAt(pos); if(! isSpace(ch)) {break; } pos++; } state.pos = pos; return true; }}if(! silent) { state.pending +='\ \'; } state.pos++; return true; }; Copy the code
Handle escape characters (\).
-
backtick.js
module.exports = function backtick(state, silent) { var start, max, marker, matchStart, matchEnd, token, pos = state.pos, ch = state.src.charCodeAt(pos); if(ch ! = =0x60/ * ` * /) { return false; } start = pos; pos++; max = state.posMax; while (pos < max && state.src.charCodeAt(pos) === 0x60/ * ` * /) { pos++; } marker = state.src.slice(start, pos); matchStart = matchEnd = pos; while ((matchStart = state.src.indexOf('`, matchEnd)) ! = =- 1) { matchEnd = matchStart + 1; while (matchEnd < max && state.src.charCodeAt(matchEnd) === 0x60/ * ` * /) { matchEnd++; } if (matchEnd - matchStart === marker.length) { if(! silent) { token = state.push('code_inline'.'code'.0); token.markup = marker; token.content = state.src.slice(pos, matchStart) .replace(/[ \n]+/g.' ') .trim(); } state.pos = matchEnd; return true; }}if(! silent) { state.pending += marker; } state.pos += marker.length;return true; }; Copy the code
Handles the backquote character (‘).
Markdown syntax: ‘This is the back quote’.
-
strikethrough.js
The code is too long, I will not paste, the function is to handle the deletion character (~).
Markdown syntax: ~~strike~~.
-
emphasis.js
Works with bold characters (* or _).
Markdown syntax: **strong**.
-
link.js
The function is to parse hyperlinks.
Markdown syntax: [text](href).
-
image.js
The purpose is to parse pictures.
Markdown syntax:! [image] (< SRC > “title”).
-
autolink.js
module.exports = function autolink(state, silent) { var tail, linkMatch, emailMatch, url, fullUrl, token, pos = state.pos; if(state.src.charCodeAt(pos) ! = =0x3C/ * (* /) { return false; } tail = state.src.slice(pos); if (tail.indexOf('>') < 0) { return false; } if (AUTOLINK_RE.test(tail)) { linkMatch = tail.match(AUTOLINK_RE); url = linkMatch[0].slice(1.- 1); fullUrl = state.md.normalizeLink(url); if(! state.md.validateLink(fullUrl)) {return false; } if(! silent) { token = state.push('link_open'.'a'.1); token.attrs = [ [ 'href', fullUrl ] ]; token.markup = 'autolink'; token.info = 'auto'; token = state.push('text'.' '.0); token.content = state.md.normalizeLinkText(url); token = state.push('link_close'.'a'.- 1); token.markup = 'autolink'; token.info = 'auto'; } state.pos += linkMatch[0].length; return true; } if (EMAIL_RE.test(tail)) { emailMatch = tail.match(EMAIL_RE); url = emailMatch[0].slice(1.- 1); fullUrl = state.md.normalizeLink('mailto:' + url); if(! state.md.validateLink(fullUrl)) {return false; } if(! silent) { token = state.push('link_open'.'a'.1); token.attrs = [ [ 'href', fullUrl ] ]; token.markup = 'autolink'; token.info = 'auto'; token = state.push('text'.' '.0); token.content = state.md.normalizeLinkText(url); token = state.push('link_close'.'a'.- 1); token.markup = 'autolink'; token.info = 'auto'; } state.pos += emailMatch[0].length; return true; } return false; }; Copy the code
As you can see, Autolink parses the url between < and >.
Markdown syntax:
.
-
html_inline.js
module.exports = function html_inline(state, silent) { var ch, match, max, token, pos = state.pos; if(! state.md.options.html) {return false; } // Check start max = state.posMax; if(state.src.charCodeAt(pos) ! = =0x3C/ * (* / || pos + 2 >= max) { return false; } // Quick fail on second char ch = state.src.charCodeAt(pos + 1); if(ch ! = =0x21/ *! * /&& ch ! = =0x3F/ *? * /&& ch ! = =0x2F/ * / * / && !isLetter(ch)) { return false; } match = state.src.slice(pos).match(HTML_TAG_RE); if(! match) {return false; } if(! silent) { token = state.push('html_inline'.' '.0); token.content = state.src.slice(pos, pos + match[0].length); } state.pos += match[0].length; return true; }; Copy the code
Parse HTML inline tags.
Markdown syntax: inline HTML .
-
entity.js
module.exports = function entity(state, silent) { var ch, code, match, pos = state.pos, max = state.posMax; if(state.src.charCodeAt(pos) ! = =0x26/ * and * /) { return false; } if (pos + 1 < max) { ch = state.src.charCodeAt(pos + 1); if (ch === 0x23 / * # * /) { match = state.src.slice(pos).match(DIGITAL_RE); if (match) { if(! silent) { code = match[1] [0].toLowerCase() === 'x' ? parseInt(match[1].slice(1), 16) : parseInt(match[1].10); state.pending += isValidEntityCode(code) ? fromCodePoint(code) : fromCodePoint(0xFFFD); } state.pos += match[0].length; return true; }}else { match = state.src.slice(pos).match(NAMED_RE); if (match) { if (has(entities, match[1]) {if(! silent) { state.pending += entities[match[1]]; } state.pos += match[0].length; return true; }}}}if(! silent) { state.pending +='&'; } state.pos++; return true; }; Copy the code
Parsing HTML entity tags, such as , & quot; , & apos; And so on.
This is ParserInline. Prototype. Tokenize the whole process, that is the type of the inline token after ruler of all rule processing, Different tokens are generated and stored on the children property of the token. But ParserInline. Prototype. The parse is not complete, it pass all ruler2 rule processing. They are respectively balance_pairs. Js, strikethrough. PostProcess, emphasis. PostProcess, text_collapse. Js.
-
balance_pairs.js
module.exports = function link_pairs(state) { var i, j, lastDelim, currDelim, delimiters = state.delimiters, max = state.delimiters.length; for (i = 0; i < max; i++) { lastDelim = delimiters[i]; if(! lastDelim.close) {continue; } j = i - lastDelim.jump - 1; while (j >= 0) { currDelim = delimiters[j]; if (currDelim.open && currDelim.marker === lastDelim.marker && currDelim.end < 0 && currDelim.level === lastDelim.level) { // typeofs are for backward compatibility with plugins var odd_match = (currDelim.close || lastDelim.open) && typeofcurrDelim.length ! = ='undefined' && typeoflastDelim.length ! = ='undefined' && (currDelim.length + lastDelim.length) % 3= = =0; if(! odd_match) { lastDelim.jump = i - j; lastDelim.open =false; currDelim.end = i; currDelim.jump = 0; break; } } j -= currDelim.jump + 1; }}};Copy the code
Delimiters array, mainly to find matching open and close tags such as *, ~, etc.
-
strikethrough.postProcess
Located in lib/rules_inline/strikethrough, the function is to process ~ characters and generate tokens for
tags. -
emphasis.postProcess
Rules_inline/lib/rules_inline/ element. This element processes * or _ characters and generates tokens with or tags.
-
text_collapse.js
module.exports = function text_collapse(state) { var curr, last, level = 0, tokens = state.tokens, max = state.tokens.length; for (curr = last = 0; curr < max; curr++) { // re-calculate levels level += tokens[curr].nesting; tokens[curr].level = level; if (tokens[curr].type === 'text' && curr + 1 < max && tokens[curr + 1].type === 'text') { // collapse two adjacent text nodes tokens[curr + 1].content = tokens[curr].content + tokens[curr + 1].content; } else { if (curr !== last) { tokens[last] = tokens[curr]; } last++; } } if (curr !== last) { tokens.length = last; } }; Copy the code
The merge () function is used to merge adjacent text nodes. Take a chestnut
const src = 12 '_' md.parse(src) / / state. Tokens are as follows[{content:"12".tag:"".type:"text" }, { content:"_".tag:"".type:"text". }]// After text_collapse,[{content:12 "_".tag:"".type:"text"}]Copy the code
At this point, ParserInline is done. If you hit the debugger to debug will find that in ParserInline. Prototype. After the parse, the type of the inline children attribute on the token has been around for some token. These child tokens are the result of ParserInline. After ParserInline, the rule functions linkify, replacements, smartQuotes, etc. The details can be found in ParserCore. Finally, we return to the Parse section of markdownIt
MarkdownIt.prototype.render = function (src, env) {
env = env || {};
return this.renderer.render(this.parse(src, env), this.options, env);
};
Copy the code
The this.parse function completes to indicate that all tokens are ready and it’s time to start the renderer!
conclusion
Let’s start with a flow chart to get an overview of the parse process.
Generate all tokens after calling this.parse. Add tokens to this.renderer.render and render the HTML string. Next we’ll look at render’s logic.