1. LUA string definition
The main source analysis of this article is based on Lua5.3.1. Lua internal type analysis has been done before.
LUA source code analysis 1- LUA internal type analysis
In the previous article, LUA’s data type abstraction was analyzed. This article looks at how Lua uses these abstract data types to hold string types.
String operations in Lua are placed in two files:
- lstring.h
- lstring.c
The definition of string types in Lua is placed in lobject.h.
1.1 TString definition
Let’s look at the TString definition:
/*
** Header for string value; string bytes follow the end of this structure
** (aligned according to 'UTString'; see next).
*/
typedef struct TString {
CommonHeader; // indicates an object that can be garbage collected
lu_byte extra; /* reserved words for short strings; "has hash" for longs */
lu_byte shrlen; /* length for short strings */
unsigned int hash;
union {
size_t lnglen; /* length for long strings */
struct TString *hnext; /* linked list for hash table */
} u;
} TString;
Copy the code
You can see from the comment that TString can represent two types of strings:
- short string
- long string
1.2 Creating TString Objects
As mentioned in the previous article, a factory function is called for GCObject: luaC_newobj (lua_State *L, int tt, size_t sz).
/*
** creates a new string object
*/
static TString *createstrobj (lua_State *L, const char *str, size_t l,
int tag, unsigned int h) {
TString *ts;
GCObject *o;
size_t totalsize; /* total size of TString object */
totalsize = sizelstring(l);
o = luaC_newobj(L, tag, totalsize);
ts = gco2ts(o);
ts->hash = h;
ts->extra = 0;
memcpy(getaddrstr(ts), str, l * sizeof(char));
getaddrstr(ts)[l] = '\ 0'; /* ending 0 */
return ts;
}
Copy the code
Each time you store a LUA string variable, you’re not actually storing string data, but a reference to the string. For example:
a = "str1"
b = "str1"
Copy the code
In fact, A and B are quoting the same data.
There is a big hash table inside Lua that holds all the strings. As shown below:This structure is called LUA virtual machinestringtable
.
The definition is as follows:
typedef struct stringtable {
TString **hash;
int nuse; /* number of elements */
int size;
} stringtable;
Copy the code
This structure is stored in global_State. Let’s look at how the LUA virtual machine is initialized when it is started.
2. Initialize the string table on the VM
When a LUA program starts executing a LUA script, it actually calls a function f_luaopen. In this function luaS_init(L) is called; This function, this function is actually the initialization of the hashtable mentioned above.
/*
** Initialize the string table and the string cache
*/
void luaS_init (lua_State *L) {
global_State *g = G(L);
int i;
luaS_resize(L, MINSTRTABSIZE); /* initial size of string table */
/* pre-create memory-error message */
g->memerrmsg = luaS_newliteral(L, MEMERRMSG);
luaC_fix(L, obj2gco(g->memerrmsg)); /* it should never be collected */
for (i = 0; i < STRCACHE_SIZE; i++) /* fill cache with valid strings */
g->strcache[i][0] = g->memerrmsg;
}
Copy the code
LuaS_resize (); luaS_resize (); luaS_resize (); luaS_resize ();
- Scenario 1: Initial call (what we see in the code now)
- Scenario 2: When the hash bucket is too small and there are too many elements, causing the data search to degenerate into a linked list search; (We’ll look at this call timing later)
Scenario 1: This function essentially initializes the Hash bucket based on MINSTRTABSIZE.
Let’s see how this function works:
/*
** resizes the string table
*/
void luaS_resize (lua_State *L, int newsize) {
int i;
stringtable *tb = &G(L)->strt;
if (newsize > tb->size) { /* grow table if needed */
luaM_reallocvector(L, tb->hash, tb->size, newsize, TString *); // There is also garbage collection involved here, which is not considered in this analysis. For the time being, it is considered as ordinary Realloc
for (i = tb->size; i < newsize; i++) // initialize the hash bucket
tb->hash[i] = NULL;
}
for (i = 0; i < tb->size; i++) { /* rehash the hash bucket */
TString *p = tb->hash[i];
tb->hash[i] = NULL;
while (p) { /* for each node in the list */
TString *hnext = p->u.hnext; /* save next */
unsigned int h = lmod(p->hash, newsize); /* new position */
p->u.hnext = tb->hash[h]; /* chain it */tb->hash[h] = p; p = hnext; }}if (newsize < tb->size) { /* Shrink table if needed, if newsize is smaller than the original size */
/* vanishing slice should be empty */
lua_assert(tb->hash[newsize] == NULL && tb->hash[tb->size - 1] = =NULL);
luaM_reallocvector(L, tb->hash, tb->size, newsize, TString *);
}
tb->size = newsize;
}
Copy the code
3. Create a string
3.1 Definition of strings in Lua
In the Lua virtual machine, the data structure of the string object is encapsulated and abstracted. The general structure of the UTString object looks like this:
/*
** Ensures that address after this type is always fully aligned.
*/
typedef union UTString {
L_Umaxalign dummy; /* ensures maximum alignment for strings */
TString tsv;
} UTString;
Copy the code
A TString is defined as follows:
/*
** Header for string value; string bytes follow the end of this structure
** (aligned according to 'UTString'; see next).
*/
typedef struct TString {
CommonHeader;
lu_byte extra; /* reserved words for short strings; "has hash" for longs */
lu_byte shrlen; /* length for short strings */
unsigned int hash;
union {
size_t lnglen; /* length for long strings */
struct TString *hnext; /* linked list for hash table */
} u;
} TString;
Copy the code
The contents of the characters are stored after the TString. End with \0.
The general structure is as follows:
3.2 Creating strings in Lua
Creating a string in the Lua virtual machine calls the function luaS_newlstr
/*
** new string (with explicit length)
*/
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
if (l <= LUAI_MAXSHORTLEN) /* short string? * /
return internshrstr(L, str, l);
else {
TString *ts;
if (unlikely(l >= (MAX_SIZE - sizeof(TString))/sizeof(char)))
luaM_toobig(L);
ts = luaS_createlngstrobj(L, l);
memcpy(getstr(ts), str, l * sizeof(char));
returnts; }}Copy the code
If LUAI_MAXSHORTLEN is less than or equal to LUAI_MAXSHORTLEN is short, otherwise long.
For short strings, the function internshrstr is called. Take a look at the implementation:
/*
** checks whether short string exists and reuses it or creates a new one
*/
static TString *internshrstr (lua_State *L, const char *str, size_t l) {
TString *ts;
global_State *g = G(L);
unsigned int h = luaS_hash(str, l, g->seed); // Calculate the hash value of the current STR
TString **list = &g->strt.hash[lmod(h, g->strt.size)]; // Find the corresponding hash bucket entry
for (ts = *list; ts ! =NULL; ts = ts->u.hnext) { // List lookup
if (l == ts->shrlen &&
(memcmp(str, getstr(ts), l * sizeof(char)) = =0)) {
/* found! * /
if (isdead(g, ts)) /* dead (but not collected yet)? * /
changewhite(ts); /* resurrect it */
returnts; }}// Resize is triggered when the linked list is too long
if (g->strt.nuse >= g->strt.size && g->strt.size <= MAX_INT/2) {
luaS_resize(L, g->strt.size * 2);
list = &g->strt.hash[lmod(h, g->strt.size)]; /* recompute with new size */
}
ts = createstrobj(L, str, l, LUA_TSHRSTR, h);
ts->shrlen = cast_byte(l);
ts->u.hnext = *list; // The tail pointer of the newly created TS points to the hash bucket opening
*list = ts; / / will be
g->strt.nuse++;
return ts;
}
Copy the code
If the string is not found in the current Hash bucket, you need to create a new one.
For long strings, LUA handles the same logic as for short strings, except that the two Obj tags are different.
#include <stdio.h>
#include <lua.h>
#include <lauxlib.h>
#include <lualib.h>
#include <string.h>
int main(void)
{
char buff[256] = {0};
int error;
lua_State *L = luaL_newstate();
luaopen_base(L);
luaopen_table(L);
luaopen_io(L);
luaopen_string(L);
luaopen_math(L);
const char *luaStr = "print(\"Hello World\")";
error = luaL_loadbuffer(L, luaStr, (size_t)strlen(luaStr), NULL) || lua_pcall(L,0.0.0);
if(error)
{
fprintf(stderr."%s", lua_tostring(L, - 1));
lua_pop(L, 1);
}
// while(fgets(buff, sizeof(buff), stdin) ! = NULL)
/ / {
/ / the error = luaL_loadbuffer (L, buff, (size_t) strlen (buff), NULL) | | lua_pcall (L, 0, 0);
// if(error)
/ / {
// fprintf(stderr, "%s", lua_tostring(L, -1));
// lua_pop(L, 1);
/ /}
// }
lua_close(L);
return 0;
}
Copy the code