Author: Ji Zhi

markdown-it-emoji

The plugin extends the ability to recognize emojis in MD files. The common syntax for emoji is: name:. Names are usually specified in English or numbers, and also support some shortcuts. For example,

:100: => 💯
:stuck_out_tongue: => 😛
// shortcuts
:D= > 😄
Copy the code

The logic for registering plug-ins is as follows:

var md = require('markdown-it') ();var emoji = require('markdown-it-emoji');

md.use(emoji [, options]);
Copy the code

The logic of MarkdownIt’s use is simple: the first argument passed in to use is a function that will be called and will take all arguments starting with the second argument.

MarkdownIt.prototype.use = function (plugin /*, params, ... * /) {
  var args = [ this ].concat(Array.prototype.slice.call(arguments.1));
  plugin.apply(plugin, args);
  return this;
};
Copy the code

We’re more interested in the markdown-it-emoji function, which is located in markdown-it-emoji/index.js.

var emojies_defs      = require('./lib/data/full.json');
var emojies_shortcuts = require('./lib/data/shortcuts');
var emoji_html        = require('./lib/render');
var emoji_replace     = require('./lib/replace');
var normalize_opts    = require('./lib/normalize_opts');


module.exports = function emoji_plugin(md, options) {

  / / step one
  var defaults = {
    defs: emojies_defs,
    shortcuts: emojies_shortcuts,
    enabled: []};/ / in step 2
  var opts = normalize_opts(md.utils.assign({}, defaults, options || {}));

  / / step 3
  md.renderer.rules.emoji = emoji_html;

  / / step 4
  md.core.ruler.push('emoji', emoji_replace(md, opts.defs, opts.shortcuts, opts.scanRE, opts.replaceRE));
};
Copy the code

The emoji_plugin function looks very simple. First, it takes two parameters, MD and options. Options performs an assign operation with built-in defaults. According to the execution of the function, we can roughly divide it into four steps.

  1. defaults

    // The defs attribute value is an emoji mapping.
    
    defs = {
      "100": "💯"."1234": "🔢"."grinning": "😀"."smiley": "😃"."smile": "😄"."grin": "😁"."laughing": "😆".// All the configuration is in 'lib/data/full.json'
    }
    
    // Shortcuts attribute values are mapping configurations of short names.
    // For example, you can use ":smile:" or ":D".
    
    shortcuts = [
      angry:            [ '> : ('.'> : - ('].blush:            [ ', ").': -)"].broken_heart:     [ '< / 3'.'< 3 \ \'],...// All configurations are in 'lib/data/shortcuts. Js'
    ]
    
    // Enable the emoji rule. Emojis that only turn on eABled Settings turn off the rest of the default emoji rules
    enabled = []
    Copy the code
  2. normalize_opts

    module.exports = function normalize_opts(options) {
      var emojies = options.defs,
          shortcuts;
    
      // Filter emojies by whitelist, if needed
      if (options.enabled.length) {
        emojies = Object.keys(emojies).reduce(function (acc, key) {
          if (options.enabled.indexOf(key) >= 0) {
            acc[key] = emojies[key];
          }
          return acc;
        }, {});
      }
    
      // Flatten shortcuts to simple object: { alias: emoji_name }
      shortcuts = Object.keys(options.shortcuts).reduce(function (acc, key) {
        // Skip aliases for filtered emojies, to reduce regexp
        if(! emojies[key]) {return acc; }
    
        if (Array.isArray(options.shortcuts[key])) {
          options.shortcuts[key].forEach(function (alias) {
            acc[alias] = key;
          });
          return acc;
        }
    
        acc[options.shortcuts[key]] = key;
        return acc;
      }, {});
    
      // Compile regexp
      var names = Object.keys(emojies)
                    .map(function (name) { return ':' + name + ':'; })
                    .concat(Object.keys(shortcuts))
                    .sort()
                    .reverse()
                    .map(function (name) { return quoteRE(name); })
                    .join('|');
      var scanRE = RegExp(names);
      var replaceRE = RegExp(names, 'g');
    
      return {
        defs: emojies,
        shortcuts: shortcuts,
        scanRE: scanRE,
        replaceRE: replaceRE
      };
    };
    Copy the code

    The function logic is very simple, is to process the options input by the user. First handle enabled whitelist check, then support shortcuts syntax, and finally generate scanRE re for identifying emoji syntax. It is | segmentation, and have a check. Full json and shortcuts. All js emoji grammar ability.

  3. Add render emoji rule

    
    md.renderer.rules.emoji = emoji_html;
    
    module.exports = function emoji_html(tokens, idx /*, options, env */) {
      return tokens[idx].content;
    };
    Copy the code

    Rendering rule, is in MarkdownIt. The renderer. After render call. That is, all the Parsers generate tokens of different types and start rendering output. The emoji_html function returns the content of the token. Content is already emoji at this point.

  4. ParserCore adds an emoji rule

    module.exports = function create_rule(md, emojies, shortcuts, scanRE, replaceRE) {
      var arrayReplaceAt = md.utils.arrayReplaceAt,
          ucm = md.utils.lib.ucmicro,
          ZPCc = new RegExp([ ucm.Z.source, ucm.P.source, ucm.Cc.source ].join('|'));
    
      function splitTextToken(text, level, Token) {... }return function emoji_replace(state) {
        var i, j, l, tokens, token,
            blockTokens = state.tokens,
            autolinkLevel = 0;
    
        for (j = 0, l = blockTokens.length; j < l; j++) {
          if(blockTokens[j].type ! = ='inline') { continue; }
          tokens = blockTokens[j].children;
    
          // We scan from the end, to keep position when new tags added.
          // Use reversed logic in links start/end match
          for (i = tokens.length - 1; i >= 0; i--) {
            token = tokens[i];
    
            if (token.type === 'link_open' || token.type === 'link_close') {
              if (token.info === 'auto') { autolinkLevel -= token.nesting; }}if (token.type === 'text' && autolinkLevel === 0 && scanRE.test(token.content)) {
              // replace current nodeblockTokens[j].children = tokens = arrayReplaceAt( tokens, i, splitTextToken(token.content, token.level, state.Token) );  }}}}; };Copy the code

    The execution time of Step 4 occurs before Step 3, because step 4’s rule is called at parsercore.parse and step 3 is called during render.

    The emoji_replace logic is clear. After the ParserBlock for ParserCore is completed, a token of type inline is generated. The emoji_replace function first filters out tokens whose type is inline. Then get the token. Children and scan the token in children from back to front. If the following logic is matched, the token of type emoji is generated and inserted into token.children by arrayReplaceAt. Finally, md.renderer.rules.emoji is processed in Step 3. Generate the corresponding emoji.

    if (token.type === 'text' && autolinkLevel === 0 && scanRE.test(token.content)) {
      // replace current nodeblockTokens[j].children = tokens = arrayReplaceAt( tokens, i, splitTextToken(token.content, token.level, state.Token) );  }Copy the code

    Let’s take a look at how splitTextToken handles token.content and finally generates an emoji token.

    function splitTextToken(text, level, Token) {
      var token, last_pos = 0, nodes = [];
    
      text.replace(replaceRE, function (match, offset, src) {
        var emoji_name;
        if (shortcuts.hasOwnProperty(match)) {
          emoji_name = shortcuts[match];
    
          if (offset > 0 && !ZPCc.test(src[offset - 1]) {return;
          }
    
          if(offset + match.length < src.length && ! ZPCc.test(src[offset + match.length])) {return; }}else {
          emoji_name = match.slice(1.- 1);
        }
    
        if (offset > last_pos) {
          token         = new Token('text'.' '.0);
          token.content = text.slice(last_pos, offset);
          nodes.push(token);
        }
    
        token         = new Token('emoji'.' '.0);
        token.markup  = emoji_name;
        token.content = emojies[emoji_name];
        nodes.push(token);
    
        last_pos = offset + match.length;
      });
    
      if (last_pos < text.length) {
        token         = new Token('text'.' '.0);
        token.content = text.slice(last_pos);
        nodes.push(token);
      }
    
      return nodes;
    }
    Copy the code

    The first argument, text, is token.content. It is a string that contains emoji syntax but has not yet produced an emoji, such as “:smile:”, then calls text.replace and passes in the replaceRE, ReplaceRE is a re that parses the lib/data/full.json and lib/data/shortcuts. Js emoji syntax. Will gradually convert the string in text that conforms to the corresponding emoji syntax into emoji. Here’s an example:

    const text = ":D,:100:,:-1:"
    
    // After splitTextToken processing, 😄,💯,👎 are output
    Copy the code

summary

After processing by markdown-it-emoji plug-in, the emoji syntax in the final MD file will be recognized and rendered as emoji.

From the point of view of this plug-in, MarkdownIt is very scalable. You can always tap into Tokens at different stages, and you can even change the Render Rule to customize your needs.