Author: Xiao Lei
Personal homepage: Github
Recently, I have been writing some related tools in Node.js. I don’t know much about how Node.js modules are loaded and the details of the module loading specification that they follow. This file is also seen node.js source code and part of the article summary:
Before the ES2015 standard, JS did not have a mature module system specification. Node.js addresses this shortcoming by adopting the module specification defined in the CommonJS specification, which includes:
1.require
Require is a function that takes a module identifier and refers to apis exposed by other modules.
2.module context
Module context specifies that there is a require variable in a module, which complies with the definition of the require function, an exports object, and that modules add properties to an exports object if they need to expose the API. And a Module Object.
3.module Identifiers
Module Identifiers define the parameters that require functions accept, such as strings that must be small camel names and may not have file suffixes. Or.. Indicates that the file path is relative and so on.
For details about the Module specification defined in commonJS, see the Wiki documentation
In our Node.js program, we use the require method that looks global (we’ll explain why it looks global later) to load other modules.
const util = require('./util')
Copy the code
First let’s take a look at how node.js defines this method internally:
Module.prototype.require = function () {
assert(path, 'missing path');
assert(typeof path === 'string'.'path must be a string');
// It actually calls the module._load method
return Module._load(path, this./* isMain */ false);
}
Module._load = function (request, parent, isMain) {...// Get the file name
var filename = Module._resolveFilename(request, parent, isMain);
// _cache Specifies the cache module
var cachedModule = Module._cache[filename];
if (cachedModule) {
updateChildren(parent, cachedModule, true);
return cachedModule.exports;
}
// For nativeModule
if (NativeModule.nonInternalExists(filename)) {
debug('load native module %s', request);
return NativeModule.require(filename);
}
// Don't call updateChildren(), Module constructor already does.
// Initialize a new module
var module = new Module(filename, parent);
if (isMain) {
process.mainModule = module;
module.id = '. ';
}
// Cache the module before loading it. Notice how node.js's module loading system avoids circular dependencies
Module._cache[filename] = module;
/ / load the module
tryModuleLoad(module, filename);
Module. exports returns the exported contents
return module.exports;
}
Copy the code
The module. _load method is an internal method that:
- Find the module path based on the string you pass to represent the module path;
- Cache based on found module path;
- Then load the corresponding module.
Let’s look at how Node.js looks for the module based on the module path string passed in:
Module._resolveFilename = function (request, parent, isMain, options) {
if (NativeModule.nonInternalExists(request)) {
return request;
}
var paths;
if (typeof options === 'object'&& options ! = =null &&
Array.isArray(options.paths)) {
...
} else {
/ / modules for roughly the path [parentDir] | [id, [parentDir]]
paths = Module._resolveLookupPaths(request, parent, true);
}
// look up the filename first, since that's the cache key.
// node index.js
// request = index.js
// paths = ['/root/foo/bar/index.js', '/root/foo/bar']
var filename = Module._findPath(request, paths, isMain);
if(! filename) {var err = new Error(`Cannot find module '${request}'`);
err.code = 'MODULE_NOT_FOUND';
throw err;
}
return filename;
}
Copy the code
Inside this method, we need to call an internal method: module. _resolveLookupPaths, which retrieves all possible paths for the Module based on the parent Module’s path:
Module._resolveLookupPaths = function (request, parent, newReturn) {... }Copy the code
Internally, this method handles the following cases:
- Is the boot module, that is, through
node xxx
Enabled module
Node.js will retrieve your program execution path directly and return it in this method
require(xxx)
Require an existence innode_modules
In the module
All possible node_modules paths on the execution path are traversed
require(./)
Require a relative path or absolute path module
Return directly to the parent path
When you have the desired path, call the module. _findPath method to find the corresponding file path.
Module._findPath = function (request, paths, isMain) {
if (path.isAbsolute(request)) {
paths = [' '];
} else if(! paths || paths.length ===0) {
return false;
}
// \x00 -> null, equivalent to an empty string
var cacheKey = request + '\x00' +
(paths.length === 1 ? paths[0] : paths.join('\x00'));
// Path cache
var entry = Module._pathCache[cacheKey];
if (entry)
return entry;
var exts;
// Is there a/on the end
var trailingSlash = request.length > 0 &&
request.charCodeAt(request.length - 1) = = =47/ * / * /;
// For each path
for (var i = 0; i < paths.length; i++) {
// Don't search further if path doesn't exist
const curPath = paths[i]; // Current path
if (curPath && stat(curPath) < 1) continue;
var basePath = path.resolve(curPath, request);
var filename;
// Call internalModuleStat to determine the file type
var rc = stat(basePath);
// If the path does not end in /, it could be a file or a folder
if(! trailingSlash) {if (rc === 0) { / / File. The File
if(preserveSymlinks && ! isMain) { filename = path.resolve(basePath); }else{ filename = toRealPath(basePath); }}else if (rc === 1) { // Directory. If the provided path is a folder, this path will be used to find the module entry file corresponding to the main field in package.json
if (exts === undefined)
// '.js' '.json' '.node' '.ms'
exts = Object.keys(Module._extensions);
// Get the value of the main field inside the PKG
filename = tryPackage(basePath, exts, isMain);
}
if(! filename) {// try it with each of the extensions
if (exts === undefined)
exts = Object.keys(Module._extensions);
filename = tryExtensions(basePath, exts, isMain); / / ${basePath}. (js | json | node) and other documents suffixes, see if the file exists}}// If the path ends in /, it is a folder
if(! filename && rc ===1) { // Directory.
if (exts === undefined)
exts = Object.keys(Module._extensions);
filename = tryPackage(basePath, exts, isMain) ||
// try it with each of the extensions at "index"
tryExtensions(path.resolve(basePath, 'index'), exts, isMain);
}
if (filename) {
// Warn once if '.' resolved outside the module dir
if (request === '. ' && i > 0) {
if(! warned) { warned =true;
process.emitWarning(
'warning: require(\'.\') resolved outside the package ' +
'directory. This functionality is deprecated and will be removed ' +
'soon.'.'DeprecationWarning'.'DEP0019'); }}// Cache path
Module._pathCache[cacheKey] = filename;
returnfilename; }}return false;
}
function tryPackage(requestPath, exts, isMain) {
var pkg = readPackage(requestPath); // Get the main field in package.json
if(! pkg)return false;
var filename = path.resolve(requestPath, pkg); // Parse the path
return tryFile(filename, isMain) || // Check whether the file exists directly
tryExtensions(filename, exts, isMain) || // Check the existence of files ending with js,json,node, etc
tryExtensions(path.resolve(filename, 'index'), exts, isMain); / / judge the respectively by ${filename} / index (js | json | node) etc at the end of the file exists
}
Copy the code
Let’s review a strategy for querying modules above:
require
Module when the last character of the string passed is not/
When:
-
If it is a file, return the path to the file
-
If it is a package.json file, and the path of the main field in this file (the corresponding method in the source code is tryPackage) :
- If the path corresponding to the main field is a file and exists, the path is returned
- The path corresponding to the main field does not have a suffix, so try using it
.js
..json
..node
..ms
Suffix to load the corresponding file - If the preceding two conditions are not met, try the corresponding path
index.js
.index.json
.index.node
file
-
Js,.json,.node,.ms to load the corresponding file (tryExtensions in the source code).
require
Module, the last character of the string passed is/
When, that is,require
Is a folder when:
- First, search the path corresponding to the main field in package.json file under this folder, and the specific process method is the same as the search for package.json file mentioned above
- Query the value of
index.js
.index.json
.index.node
And other documents
When the path to the file is found, we call tryModuleLoad to load the module. Inside this method, we actually call the load method of the module instance:
Module.prototype.load = function () {... this.filename = filename;// Define paths for the Module. Gets all possible node_modules paths on the module path
this.paths = Module._nodeModulePaths(path.dirname(filename));
var extension = path.extname(filename) || '.js';
if(! Module._extensions[extension]) extension ='.js';
// Start loading the file
Module._extensions[extension](this, filename);
this.loaded = true; . }Copy the code
Call the module. _extension method to load files in different formats, such as js files:
Module._extensions['.js'] = function(module, filename) {
// First read the text content of the file
var content = fs.readFileSync(filename, 'utf8');
module._compile(internalModule.stripBOM(content), filename);
};
Copy the code
Internally we call the module.prototype. _compile method:
Module.prototype._compile = function (content, filename)) {
content = internalModule.stripShebang(content);
// create wrapper function
// Wrap a layer around the source text
var wrapper = Module.wrap(content);
// vm.runInThisContext executes the wrapper code inside a V8 virtual machine
var compiledWrapper = vm.runInThisContext(wrapper, {
filename: filename,
lineOffset: 0.displayErrors: true
});
var inspectorWrapper = null;
if (process._breakFirstLine && process._eval == null) {
if(! resolvedArgv) {// we enter the repl if we're not given a filename argument.
if (process.argv[1]) {
resolvedArgv = Module._resolveFilename(process.argv[1].null.false);
} else {
resolvedArgv = 'repl'; }}// Set breakpoint on module start
if (filename === resolvedArgv) {
delete process._breakFirstLine;
inspectorWrapper = process.binding('inspector').callAndPauseOnStart; }}var dirname = path.dirname(filename);
// construct the require function
var require = internalModule.makeRequireFunction(this);
var depth = internalModule.requireDepth;
if (depth === 0) stat.cache = new Map(a);var result;
if (inspectorWrapper) {
result = inspectorWrapper(compiledWrapper, this.exports, this.exports,
require.this, filename, dirname);
} else {
// Start the function
Module. Exports/require/module/filename/dirname
result = compiledWrapper.call(this.exports, this.exports, require.this,
filename, dirname);
}
if (depth === 0) stat.cache = null;
return result;
}
Module.wrap = function(script) {
return Module.wrapper[0] + script + Module.wrapper[1];
};
Module.wrapper = [
'(function (exports, require, module, __filename, __dirname) { '.'\n}); '
];
Copy the code
- through
Module.wrap
Wrap the source code in a layer (followcommonJS
Specification) - By calling the
vm
The V8 virtual machine exposes the method to construct a new function - Complete the function call
Module.wrapper passes five arguments to the source text:
- exports
Is a reference to the exports property of the third argument, Module
- require
The require is not Module. The prototype. The require method, but rather through internalModule. MakeRequireFunction restructuring, This method internally relies on the module.prototype. require method to load modules, and also extends the require method.
- module
Module objects, which need to expose apis for other modules to use, need to be defined on the module.exports property
- __filename
The absolute path to the current file
- __dirname
The absolute path to the parent folder of the current file
A few questions
Exports and Module. exports
Pay special attention to the connection between the first argument and the third argument: the first argument is a reference to the exports property of the third argument. Once a module exports is assigned to a new object, it breaks the reference between the module.exports property and the API exposed through the current module exports. A reference to a module always gets the module.exports property.
A circular reference
Official example:
a.js
console.log('a beginning');
exports.done = false;
const b = require('./b.js');
console.log('在 a 中,b.done = %j', b.done);
exports.done = true;
console.log(End of the 'a');
Copy the code
b.js
console.log('b start');
exports.done = false;
const a = require('./a.js');
console.log('in b, a. tone = %j', a.done);
exports.done = true;
console.log(End of the 'b');
Copy the code
main.js
console.log('the main start');
const a = require('./a.js');
const b = require('./b.js');
console.log('In main, a.tone =%j, b.tone =%j', a.done, b.done);
Copy the code
$node main.js main start A start B Start IN B, a.dot =falseB ends in A, b. tone =trueA ends in main, a. Tone =true, b.d one =true
Copy the code
When Module A is loaded, Module B needs to be loaded, but Module A is cached before it is actually loaded. For details, see module._load:
Module._cache[filename] = module;
tryModuleLoad(module, filename);
Copy the code
When module A is loaded again during module B loading, the API exported by module A is directly fetched from the cache. At this time, the property of exports.done is still false and not set to true.