Author: Ji Zhi

ParserInline

As we saw in ParserCore, after ParserCore processing, a token of type inline is generated. The next step is to hand it to ParserInline. The rule function looks like this:

module.exports = function inline(state) {
  var tokens = state.tokens, tok, i, l;

  // Parse inlines
  for (i = 0, l = tokens.length; i < l; i++) {
    tok = tokens[i];
    if (tok.type === 'inline') { state.md.inline.parse(tok.content, state.md, state.env, tok.children); }}};Copy the code

The parse method ParserInline is called when the token of type is inline. ParserInline is located in lib/parser_inline.js.

var _rules = [
  [ 'text'.require('./rules_inline/text')], ['newline'.require('./rules_inline/newline')], ['escape'.require('./rules_inline/escape')], ['backticks'.require('./rules_inline/backticks')], ['strikethrough'.require('./rules_inline/strikethrough').tokenize ],
  [ 'emphasis'.require('./rules_inline/emphasis').tokenize ],
  [ 'link'.require('./rules_inline/link')], ['image'.require('./rules_inline/image')], ['autolink'.require('./rules_inline/autolink')], ['html_inline'.require('./rules_inline/html_inline')], ['entity'.require('./rules_inline/entity')]];var _rules2 = [
  [ 'balance_pairs'.require('./rules_inline/balance_pairs')], ['strikethrough'.require('./rules_inline/strikethrough').postProcess ],
  [ 'emphasis'.require('./rules_inline/emphasis').postProcess ],
  [ 'text_collapse'.require('./rules_inline/text_collapse')]];function ParserInline() {
  var i;

  this.ruler = new Ruler();

  for (i = 0; i < _rules.length; i++) {
    this.ruler.push(_rules[i][0], _rules[i][1]);
  }

  this.ruler2 = new Ruler();

  for (i = 0; i < _rules2.length; i++) {
    this.ruler2.push(_rules2[i][0], _rules2[i][1]); }}Copy the code

From the constructor, we can see that ParserInline, unlike ParserBlock, has two instances of Ruler. Ruler is called in tokenize and Ruler2 is used after Tokenize.

ParserInline.prototype.tokenize = function (state) {
  var ok, i,
      rules = this.ruler.getRules(' '),
      len = rules.length,
      end = state.posMax,
      maxNesting = state.md.options.maxNesting;

  while (state.pos < end) {
    if (state.level < maxNesting) {
      for (i = 0; i < len; i++) {
        ok = rules[i](state, false);
        if (ok) { break; }}}if (ok) {
      if (state.pos >= end) { break; }
      continue;
    }

    state.pending += state.src[state.pos++];
  }

  if(state.pending) { state.pushPending(); }}; ParserInline.prototype.parse =function (str, md, env, outTokens) {
  var i, rules, len;
  var state = new this.State(str, md, env, outTokens);

  this.tokenize(state);

  rules = this.ruler2.getRules(' ');
  len = rules.length;

  for (i = 0; i < len; i++) { rules[i](state); }};Copy the code

Parse is already inside the parse function by passing a token of type inline to the md.inline-parse method. The state for ParserInline is generated. Remember the state of ParserCore and ParserBlock? They are used to store the state information of different Parses during parse.

Let’s start with the State class, which is located in lib/rules_inline/ state_inline-js.

function StateInline(src, md, env, outTokens) {
  this.src = src;
  this.env = env;
  this.md = md;
  this.tokens = outTokens;

  this.pos = 0;
  this.posMax = this.src.length;
  this.level = 0;
  this.pending = ' ';
  this.pendingLevel = 0;

  this.cache = {};

  this.delimiters = [];
}
Copy the code

List some useful fields:

  1. pos

The string index of the current token’s content

  1. posMax

Maximum index of the current token’s content

  1. pending

Store a complete string, such as

let src = "**emphasis**"
let state = new StateInline(src)

// state. Pending = 'element'
Copy the code
  1. delimiters

Store delimiters for special tokens, such as *, ~, etc. The element format is as follows:

{
  close:false
  end:- 1
  jump:0
  length:2
  level:0
  marker:42
  open:true
  token:0
}
// marker indicates the ASCII code corresponding to the string
Copy the code

After state is generated, the tokenize method is called.

ParserInline.prototype.tokenize = function (state) {
  var ok, i,
      rules = this.ruler.getRules(' '),
      len = rules.length,
      end = state.posMax,
      maxNesting = state.md.options.maxNesting;

  while (state.pos < end) {
    if (state.level < maxNesting) {
      for (i = 0; i < len; i++) {
        ok = rules[i](state, false);
        if (ok) { break; }}}if (ok) {
      if (state.pos >= end) { break; }
      continue;
    }

    state.pending += state.src[state.pos++];
  }

  if(state.pending) { state.pushPending(); }};Copy the code

First get the default rule chain and then scan the Content field of token from the first character to the end. Call rule function of ruler for each character in turn. They are located under the lib/rules_inline/ folder. The order of invocation is as follows:

  • text.js

    module.exports = function text(state, silent) {
      var pos = state.pos;
    
      while(pos < state.posMax && ! isTerminatorChar(state.src.charCodeAt(pos))) { pos++; }if (pos === state.pos) { return false; }
    
      if(! silent) { state.pending += state.src.slice(state.pos, pos); } state.pos = pos;return true;
    };
    Copy the code

    Extract consecutive non-isterminatorchar characters. IsTerminatorChar characters are specified as follows:

    function isTerminatorChar(ch) {
      switch (ch) {
        case 0x0A/* \n */:
        case 0x21/ *! * /:
        case 0x23/ * # * /:
        case 0x24/ * $* /:
        case 0x25/ * % * /:
        case 0x26/ * and * /:
        case 0x2A/ * * * /:
        case 0x2B/ * + * /:
        case 0x2D/ * - * /:
        case 0x3A/* : */:
        case 0x3C/ * (* /:
        case 0x3D/ * = * /:
        case 0x3E/ * * / >:
        case 0x40/ * @ * /:
        case 0x5B[/ * * /:
        case 0x5C/ * \ * /:
        case 0x5D/* ] */:
        case 0x5E/ * ^ * /:
        case 0x5F/ * _ * /:
        case 0x60/ * ` * /:
        case 0x7B{/ * * /:
        case 0x7D/ *} * /:
        case 0x7E/ * ~ * /:
          return true;
        default:
          return false; }}Copy the code

    If the input is “__ad__”, then the rule extracts the “AD” string.

  • newline.js

    module.exports = function newline(state, silent) {
      var pmax, max, pos = state.pos;
    
      if(state.src.charCodeAt(pos) ! = =0x0A/* \n */) { return false; }
    
      pmax = state.pending.length - 1;
      max = state.posMax;
    
      if(! silent) {if (pmax >= 0 && state.pending.charCodeAt(pmax) === 0x20) {
          if (pmax >= 1 && state.pending.charCodeAt(pmax - 1) = = =0x20) {
            state.pending = state.pending.replace(/ + $/.' ');
            state.push('hardbreak'.'br'.0);
          } else {
            state.pending = state.pending.slice(0.- 1);
            state.push('softbreak'.'br'.0); }}else {
          state.push('softbreak'.'br'.0);
        }
      }
    
      pos++;
    
      while (pos < max && isSpace(state.src.charCodeAt(pos))) { pos++; }
    
      state.pos = pos;
      return true;
    };
    Copy the code

    Handles newline characters (\n).

  • escape.js

    module.exports = function escape(state, silent) {
      var ch, pos = state.pos, max = state.posMax;
    
      if(state.src.charCodeAt(pos) ! = =0x5C/ * \ * /) { return false; }
    
      pos++;
    
      if (pos < max) {
        ch = state.src.charCodeAt(pos);
    
        if (ch < 256&& ESCAPED[ch] ! = =0) {
          if(! silent) { state.pending += state.src[pos]; } state.pos +=2;
          return true;
        }
    
        if (ch === 0x0A) {
          if(! silent) { state.push('hardbreak'.'br'.0);
          }
    
          pos++;
          // skip leading whitespaces from next line
          while (pos < max) {
            ch = state.src.charCodeAt(pos);
            if(! isSpace(ch)) {break; }
            pos++;
          }
    
          state.pos = pos;
          return true; }}if(! silent) { state.pending +='\ \'; }
      state.pos++;
      return true;
    };
    Copy the code

    Handle escape characters (\).

  • backtick.js

    module.exports = function backtick(state, silent) {
      var start, max, marker, matchStart, matchEnd, token,
          pos = state.pos,
          ch = state.src.charCodeAt(pos);
    
      if(ch ! = =0x60/ * ` * /) { return false; }
    
      start = pos;
      pos++;
      max = state.posMax;
    
      while (pos < max && state.src.charCodeAt(pos) === 0x60/ * ` * /) { pos++; }
    
      marker = state.src.slice(start, pos);
    
      matchStart = matchEnd = pos;
    
      while ((matchStart = state.src.indexOf('`, matchEnd)) ! = =- 1) {
        matchEnd = matchStart + 1;
    
        while (matchEnd < max && state.src.charCodeAt(matchEnd) === 0x60/ * ` * /) { matchEnd++; }
    
        if (matchEnd - matchStart === marker.length) {
          if(! silent) { token = state.push('code_inline'.'code'.0);
            token.markup  = marker;
            token.content = state.src.slice(pos, matchStart)
                                    .replace(/[ \n]+/g.' ')
                                    .trim();
          }
          state.pos = matchEnd;
          return true; }}if(! silent) { state.pending += marker; } state.pos += marker.length;return true;
    };
    
    Copy the code

    Handles the backquote character (‘).

    Markdown syntax: ‘This is the back quote’.

  • strikethrough.js

    The code is too long, I will not paste, the function is to handle the deletion character (~).

    Markdown syntax: ~~strike~~.

  • emphasis.js

    Works with bold characters (* or _).

    Markdown syntax: **strong**.

  • link.js

    The function is to parse hyperlinks.

    Markdown syntax: [text](href).

  • image.js

    The purpose is to parse pictures.

    Markdown syntax:! [image] (< SRC > “title”).

  • autolink.js

    module.exports = function autolink(state, silent) {
      var tail, linkMatch, emailMatch, url, fullUrl, token,
          pos = state.pos;
    
      if(state.src.charCodeAt(pos) ! = =0x3C/ * (* /) { return false; }
    
      tail = state.src.slice(pos);
    
      if (tail.indexOf('>') < 0) { return false; }
    
      if (AUTOLINK_RE.test(tail)) {
        linkMatch = tail.match(AUTOLINK_RE);
    
        url = linkMatch[0].slice(1.- 1);
        fullUrl = state.md.normalizeLink(url);
        if(! state.md.validateLink(fullUrl)) {return false; }
    
        if(! silent) { token = state.push('link_open'.'a'.1);
          token.attrs   = [ [ 'href', fullUrl ] ];
          token.markup  = 'autolink';
          token.info    = 'auto';
    
          token         = state.push('text'.' '.0);
          token.content = state.md.normalizeLinkText(url);
    
          token         = state.push('link_close'.'a'.- 1);
          token.markup  = 'autolink';
          token.info    = 'auto';
        }
    
        state.pos += linkMatch[0].length;
        return true;
      }
    
      if (EMAIL_RE.test(tail)) {
        emailMatch = tail.match(EMAIL_RE);
    
        url = emailMatch[0].slice(1.- 1);
        fullUrl = state.md.normalizeLink('mailto:' + url);
        if(! state.md.validateLink(fullUrl)) {return false; }
    
        if(! silent) { token = state.push('link_open'.'a'.1);
          token.attrs   = [ [ 'href', fullUrl ] ];
          token.markup  = 'autolink';
          token.info    = 'auto';
    
          token         = state.push('text'.' '.0);
          token.content = state.md.normalizeLinkText(url);
    
          token         = state.push('link_close'.'a'.- 1);
          token.markup  = 'autolink';
          token.info    = 'auto';
        }
    
        state.pos += emailMatch[0].length;
        return true;
      }
    
      return false;
    };
    Copy the code

    As you can see, Autolink parses the url between < and >.

    Markdown syntax:

    .

  • html_inline.js

    module.exports = function html_inline(state, silent) {
      var ch, match, max, token,
          pos = state.pos;
    
      if(! state.md.options.html) {return false; }
    
      // Check start
      max = state.posMax;
      if(state.src.charCodeAt(pos) ! = =0x3C/ * (* / ||
          pos + 2 >= max) {
        return false;
      }
    
      // Quick fail on second char
      ch = state.src.charCodeAt(pos + 1);
      if(ch ! = =0x21/ *! * /&& ch ! = =0x3F/ *? * /&& ch ! = =0x2F/ * / * / &&
          !isLetter(ch)) {
        return false;
      }
    
      match = state.src.slice(pos).match(HTML_TAG_RE);
      if(! match) {return false; }
    
      if(! silent) { token = state.push('html_inline'.' '.0);
      token.content = state.src.slice(pos, pos + match[0].length);
    }
    state.pos += match[0].length;
    return true;
    };
    Copy the code

    Parse HTML inline tags.

    Markdown syntax: inline HTML .

  • entity.js

    module.exports = function entity(state, silent) {
      var ch, code, match, pos = state.pos, max = state.posMax;
    
      if(state.src.charCodeAt(pos) ! = =0x26/ * and * /) { return false; }
    
      if (pos + 1 < max) {
        ch = state.src.charCodeAt(pos + 1);
    
        if (ch === 0x23 / * # * /) {
          match = state.src.slice(pos).match(DIGITAL_RE);
          if (match) {
            if(! silent) { code = match[1] [0].toLowerCase() === 'x' ? parseInt(match[1].slice(1), 16) : parseInt(match[1].10);
              state.pending += isValidEntityCode(code) ? fromCodePoint(code) : fromCodePoint(0xFFFD);
            }
            state.pos += match[0].length;
            return true; }}else {
          match = state.src.slice(pos).match(NAMED_RE);
          if (match) {
            if (has(entities, match[1]) {if(! silent) { state.pending += entities[match[1]]; }
              state.pos += match[0].length;
              return true; }}}}if(! silent) { state.pending +='&'; }
      state.pos++;
      return true;
    };
    Copy the code

    Parsing HTML entity tags, such as   , & quot; , & apos; And so on.

This is ParserInline. Prototype. Tokenize the whole process, that is the type of the inline token after ruler of all rule processing, Different tokens are generated and stored on the children property of the token. But ParserInline. Prototype. The parse is not complete, it pass all ruler2 rule processing. They are respectively balance_pairs. Js, strikethrough. PostProcess, emphasis. PostProcess, text_collapse. Js.

  • balance_pairs.js

    module.exports = function link_pairs(state) {
      var i, j, lastDelim, currDelim,
          delimiters = state.delimiters,
          max = state.delimiters.length;
    
      for (i = 0; i < max; i++) {
        lastDelim = delimiters[i];
    
        if(! lastDelim.close) {continue; }
    
        j = i - lastDelim.jump - 1;
    
        while (j >= 0) {
          currDelim = delimiters[j];
    
          if (currDelim.open &&
              currDelim.marker === lastDelim.marker &&
              currDelim.end < 0 &&
              currDelim.level === lastDelim.level) {
    
            // typeofs are for backward compatibility with plugins
            var odd_match = (currDelim.close || lastDelim.open) &&
                            typeofcurrDelim.length ! = ='undefined' &&
                            typeoflastDelim.length ! = ='undefined' &&
                            (currDelim.length + lastDelim.length) % 3= = =0;
    
            if(! odd_match) { lastDelim.jump = i - j; lastDelim.open =false;
              currDelim.end  = i;
              currDelim.jump = 0;
              break;
            }
          }
    
          j -= currDelim.jump + 1; }}};Copy the code

    Delimiters array, mainly to find matching open and close tags such as *, ~, etc.

  • strikethrough.postProcess

    Located in lib/rules_inline/strikethrough, the function is to process ~ characters and generate tokens for tags.

  • emphasis.postProcess

    Rules_inline/lib/rules_inline/ element. This element processes * or _ characters and generates tokens with or tags.

  • text_collapse.js

    module.exports = function text_collapse(state) {
      var curr, last,
          level = 0,
          tokens = state.tokens,
          max = state.tokens.length;
    
      for (curr = last = 0; curr < max; curr++) {
        // re-calculate levels
        level += tokens[curr].nesting;
        tokens[curr].level = level;
    
        if (tokens[curr].type === 'text' &&
            curr + 1 < max &&
            tokens[curr + 1].type === 'text') {
    
          // collapse two adjacent text nodes
          tokens[curr + 1].content = tokens[curr].content + tokens[curr + 1].content;
        } else {
          if (curr !== last) { tokens[last] = tokens[curr]; }
    
          last++;
        }
      }
    
      if (curr !== last) {
        tokens.length = last;
      }
    };
    Copy the code

    The merge () function is used to merge adjacent text nodes. Take a chestnut

    const src = 12 '_'
    
    md.parse(src)
    / / state. Tokens are as follows[{content:"12".tag:"".type:"text"
      },
      {
        content:"_".tag:"".type:"text". }]// After text_collapse,[{content:12 "_".tag:"".type:"text"}]Copy the code

At this point, ParserInline is done. If you hit the debugger to debug will find that in ParserInline. Prototype. After the parse, the type of the inline children attribute on the token has been around for some token. These child tokens are the result of ParserInline. After ParserInline, the rule functions linkify, replacements, smartQuotes, etc. The details can be found in ParserCore. Finally, we return to the Parse section of markdownIt

MarkdownIt.prototype.render = function (src, env) {
  env = env || {};

  return this.renderer.render(this.parse(src, env), this.options, env);
};
Copy the code

The this.parse function completes to indicate that all tokens are ready and it’s time to start the renderer!

conclusion

Let’s start with a flow chart to get an overview of the parse process.

Generate all tokens after calling this.parse. Add tokens to this.renderer.render and render the HTML string. Next we’ll look at render’s logic.