Realize your own database 2

I. Introduction

Last time, the database supported the insertion and query of a test table, but all the data is saved to the disk. If the program is restarted, all the data will be lost, so it needs to be persisted to the disk. Like sqlite, simply save the database The data is saved to a disk file.

Two realization principles

93cc339481f96dce3f5791f6c25b891d.png
image.png

We have persisted the database data to a 4kB memory block last time, and we can easily persist this memory to a file. The specific implementation steps are as follows:

  1. We define an abstract structure Pager, which is a comprehensive abstraction of file structure and pages. details as follows:

typedef struct {
  int file_descriptor;
  uint32_t file_length;
 void* pages[TABLE_MAX_PAGES];
} Pager;

typedef struct {
      uint32_t num_rows;
      Pager *pager;
 } Table;
  1. When operating data, obtain the corresponding page content through the interface. If the page exists, use it directly. If the page does not exist, load the 4KB data at the corresponding location from the file and save it to the corresponding page.

  2. When the database is closed, the entire result is written back to disk.

  3. The key realization is to use lseek to locate the position of the file, and the data content stored in the file is located according to the number of lines, just like in the memory.

  4. When closing the data, after locating the file, it is directly written to the disk in the form of a memory block, which is very simple and amazing.

Three implementation code

First of all, the application of Table has been changed, from the original reading completely from the memory to being associated with the open file in addition to the required memory.

253 Pager *pager_open(const char *filename)
254 {
              // 打开文件,为可读可写,没有则创建
255         int fd = open(filename, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR);
256         if (fd == -1) {
257                 printf("Unable to open file.\n");
258                 exit(EXIT_FAILURE);
259         }
               // 定位到尾部 返回文件的大小
260         int  file_length = lseek(fd, 0, SEEK_END);
              // 申请Pager,且和文件关联
261         Pager *pager = (Pager*)malloc(sizeof(Pager));
262         pager->file_descriptor = fd;
263         pager->file_length = file_length;
264 
265         for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
266                 pager->pages[i] = NULL;
267         }
268         return pager;
269 }


304 Table *db_open(const char *filename)
305 {
306         Pager *pager      = pager_open(filename);
307         uint32_t num_rows = pager->file_length / ROW_SIZE;
308         Table *table      = (Table *)malloc(sizeof(Table));
309         table->pager = pager;
310         table->num_rows   = num_rows;
311         return table;
312 }

When the database is closed, the data is flushed to disk, as follows:

332 void db_close(Table *table)
333 {
334         Pager *pager = table->pager;
              // 根据行数计算页面数量
335         uint32_t num_full_pages = table->num_rows / ROWS_PER_PAGE;
336         for (uint32_t i = 0; i < num_full_pages; i++) {
337                 if (pager->pages[i] == NULL) {
338                         break;
339                 }
340                 pager_flush(pager, i, PAGE_SIZE);
341         }
              // 处理不够一页多余的行数
342         uint32_t num_add_rows = table->num_rows % ROWS_PER_PAGE;
343         if (num_add_rows > 0) {
344                 uint32_t page_num = num_full_pages;
345                 if (pager->pages[page_num] != NULL) {
346                         pager_flush(pager, page_num, num_add_rows * ROW_SIZE);
347                         free(pager->pages[page_num]);
348                         pager->pages[page_num] = NULL;
349                 }
350         }
351         int result = close(pager->file_descriptor);
352         if (result == -1) {
353                 printf("Error closing db file.\n");
354                 exit(EXIT_FAILURE);
355         }
356         for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
357                 Pager *page = (Pager *) pager->pages[i];
358                 if (page) {
359                         free(page);
360                         pager->pages[i] = NULL;
361                 }
362         }
363         free(pager);
364         free(table);
365 }

The following is the core code, which is to refresh the pager to the disk. The code is as follows:

314 void pager_flush(Pager *pager, uint32_t page_num, uint32_t size)
315 {
316         if (pager->pages[page_num] == NULL) {
317                 printf("Tried to flush null page.\n");
318                 exit(EXIT_FAILURE);
319         }
               // 根据页面数定位到文件的具体位置
320         off_t offset = lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
321         if (offset == -1) {
322                 printf("Error seeking:%d\n", errno);
323                 exit(EXIT_FAILURE);
324         }
               // 将数据写入到文件,每次大部分写入一个Page大小,也可能写入部分
325         ssize_t byte_written = write(pager->file_descriptor, pager->pages[page_num], size);
326         if (byte_written == -1) {
327                 printf("Error writing:%d", errno);
328                 exit(EXIT_FAILURE);
329         }
330 }

It’s no problem to write to disk, but when reading, it’s similar to reading from memory, just change it to read from a file:

282 ExecuteResult execute_select(Statement *statement, Table *table)
283 {
284         Row row;
285         for (uint32_t i = 0; i < table->num_rows; i++) {
                       // 将数据反序列化然后打印出来
286                 deserialize_row(row_slot(table, i), &row);
287                 print_row(&row);
288         }
289         return EXECUTE_SUCCESS;
290 }

Get memory pages and page offsets

244 void *row_slot(Table *table, uint32_t row_num)
245 {
              // 定位页数
246         uint32_t page_num = row_num / ROWS_PER_PAGE;
               // 获取页面
247         void *page = get_page(table->pager, page_num);
248         uint32_t row_offset  = row_num % ROWS_PER_PAGE;
               // 定位页内的行偏移量
249         uint32_t byte_offset = row_offset * ROW_SIZE;
250         return (char *)page + byte_offset;
251 }

The following is the specific page for reading the file:

100 void *get_page(Pager *pager, uint32_t page_num)
101 {
102         if (page_num > TABLE_MAX_PAGES) {
103                 printf("Tried to fetch page number out of bounds. %d>%d", page_num, TABLE_MAX_PAGES);
104                 exit(EXIT_FAILURE);
105         }
106         if (pager->pages[page_num] == NULL) {
107                 void  *page = malloc(PAGE_SIZE);
108                 uint32_t num_pages = pager->file_length / PAGE_SIZE;
109                 if (pager->file_length % PAGE_SIZE) {
110                         num_pages += 1;
111                 }
                      // 从文件中读取一个页面大小,如果原来文件为0则忽略
112                 if (page_num <= num_pages && pager->file_length != 0) {
113                         lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
114                         ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
115                         if (bytes_read == -1) {
116                                 printf("Error reading file:%d\n", errno);
117                                 exit(EXIT_FAILURE);
118                         }
119                 }
120                 pager->pages[page_num] = page;
121         }
122        return pager->pages[page_num];
123 }

Four final complete code

#include <stdio.h>
#if defined(_MSC_VER)
        #include <BaseTsd.h>
        typedef SSIZE_T ssize_t;
#endif

#include <stdint.h>
#include <string.h>
#include <malloc.h>
#include <cstdlib>
#include <stdbool.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
// #include <file.h>



#define EXIT_SUCCESS 0
#define MAX_LEN 1024
#pragma warning(disable : 4819)

#define COLUMN_USERNAME_SIZE 32
#define COLUMN_EMAIL_SIZE 255
#define TABLE_MAX_PAGES 100

typedef enum {
        META_COMMAND_SUCCESS,
        META_COMMAND_UNRECOGNIZED_COMMAND
} MetaCommandResult;

typedef enum { STATEMENT_INSERT, STATEMENT_SELECT } StatementType;
typedef enum { PREPARE_SUCCESS, PREPARE_UNRECOGNIZED_STATEMENT, PREPARE_NEGATIVE_ID, PREPARE_STRING_TOO_LONG, PREPARE_SYNTAX_ERROR} PrepareResult;
typedef enum { EXECUTE_SUCCESS, EXECUTE_TABLE_FULL } ExecuteResult;



typedef struct {
        uint32_t id;
        char username[COLUMN_USERNAME_SIZE + 1];
        char email[COLUMN_EMAIL_SIZE + 1];
} Row;

typedef struct {
        int file_descriptor;
        uint32_t file_length;
        void *pages[TABLE_MAX_PAGES];
} Pager;

typedef struct {
        uint32_t num_rows;
        Pager *pager;
} Table;



typedef struct {
        StatementType type;
        Row row_to_insert;
} Statement;



typedef struct {
        char *buffer;
        size_t buffer_length;
        ssize_t input_length;
} InputBuffer;


#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)

const uint32_t ID_SIZE = size_of_attribute(Row, id);
const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
const uint32_t ID_OFFSET = 0;

const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;


const uint32_t PAGE_SIZE = 4096;
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;



void db_close(Table *table);

InputBuffer *new_input_buffer()
{
        InputBuffer *input_buffer = (InputBuffer *)malloc(sizeof(InputBuffer));
        input_buffer->buffer = NULL;
        input_buffer->buffer_length = 0;
        input_buffer->input_length = 0;
        return input_buffer;
}

void *get_page(Pager *pager, uint32_t page_num)
{
        if (page_num > TABLE_MAX_PAGES) {
                printf("Tried to fetch page number out of bounds. %d>%d", page_num, TABLE_MAX_PAGES);
                exit(EXIT_FAILURE);
        }
        if (pager->pages[page_num] == NULL) {
                void  *page = malloc(PAGE_SIZE);
                uint32_t num_pages = pager->file_length / PAGE_SIZE;
                if (pager->file_length % PAGE_SIZE) {
                        num_pages += 1;
                }
                if (page_num <= num_pages && pager->file_length != 0) {
                        lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
                        ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
                        if (bytes_read == -1) {
                                printf("Error reading file:%d\n", errno);
                                exit(EXIT_FAILURE);
                        }
                }
                pager->pages[page_num] = page;
        }
       return pager->pages[page_num];
}

int  getline_my(char **buffer, size_t *length, FILE *fd)
{
        int i = 0;
        char ch;
        char buf[MAX_LEN] = {0};

        while ((ch = fgetc(fd)) != EOF && ch != '\n') {
                if (MAX_LEN - 1 == i) {
                        break;
                }
                buf[i++] = ch;
        }
        *length = i;
        buf[i] = '\0';
        *buffer = (char *)malloc(sizeof(char) * (i + 1));
        strncpy(*buffer, buf, i + 1);
        return i;
}

void print_row(Row *row)
{
        printf("(%d,%s,%s)\n", row->id, row->username, row->email);
}

void read_input(InputBuffer *input_buffer)
{
        ssize_t bytes_read =
            getline_my(&(input_buffer->buffer), &(input_buffer->buffer_length), stdin);

        if (bytes_read <= 0) {
                printf("Error reading input\n");
                exit(EXIT_FAILURE);
        }

        input_buffer->input_length = bytes_read ;
        input_buffer->buffer[bytes_read] = 0;
}

void close_input_buffer(InputBuffer *input_buffer)
{
        free(input_buffer->buffer);
        free(input_buffer);
        input_buffer = NULL;
}


///

MetaCommandResult do_meta_command(InputBuffer *input_buffer, Table *table)
{
        if (strcmp(input_buffer->buffer, ".exit") == 0) {
                close_input_buffer(input_buffer);
                db_close(table);
                exit(EXIT_SUCCESS);
        } else {
                return META_COMMAND_UNRECOGNIZED_COMMAND;
        }
}


PrepareResult prepare_insert(InputBuffer *input_buffer, Statement *statement)
{
        statement->type = STATEMENT_INSERT;

        char *keyword = strtok(input_buffer->buffer, " ");
        char *id_string = strtok(NULL, " ");
        char *username = strtok(NULL, " ");
        char *email = strtok(NULL, " ");

        if (id_string == NULL || username == NULL || email == NULL) {
                return PREPARE_SYNTAX_ERROR;
        }

        int id = atoi(id_string);
        if (id < 0) {
                return PREPARE_NEGATIVE_ID;
        }
        if (strlen(username) > COLUMN_USERNAME_SIZE) {
                return PREPARE_STRING_TOO_LONG;
        }
        if (strlen(email) > COLUMN_EMAIL_SIZE) {
                return PREPARE_STRING_TOO_LONG;
        }

        statement->row_to_insert.id = id;
        strcpy(statement->row_to_insert.username, username);
        strcpy(statement->row_to_insert.email, email);

        return PREPARE_SUCCESS;
}

PrepareResult prepare_statement(InputBuffer *input_buffer,
                                Statement *statement)
{
        if (strncmp(input_buffer->buffer, "insert", 6) == 0) {
                return prepare_insert(input_buffer, statement);
        } else if (strncmp(input_buffer->buffer, "select", 6) == 0) {
                statement->type = STATEMENT_SELECT;
                return PREPARE_SUCCESS;
        }

        return PREPARE_UNRECOGNIZED_STATEMENT;
}


void serialize_row(Row *source, void *destination)
{
        memcpy((char *)destination + ID_OFFSET, &(source->id), ID_SIZE);
        memcpy((char *)destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
        memcpy((char *)destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
}

void deserialize_row(void *source, Row *destination)
{
        memcpy(&(destination->id), (char *)source + ID_OFFSET, ID_SIZE);
        memcpy(&(destination->username), (char *)source + USERNAME_OFFSET, USERNAME_SIZE);
        memcpy(&(destination->email), (char *)source + EMAIL_OFFSET, EMAIL_SIZE);
}

void *row_slot(Table *table, uint32_t row_num)
{
        uint32_t page_num = row_num / ROWS_PER_PAGE;
        void *page = get_page(table->pager, page_num);
        uint32_t row_offset  = row_num % ROWS_PER_PAGE;
        uint32_t byte_offset = row_offset * ROW_SIZE;
        return (char *)page + byte_offset;
}

Pager *pager_open(const char *filename)
{
        int fd = open(filename, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR);
        if (fd == -1) {
                printf("Unable to open file.\n");
                exit(EXIT_FAILURE);
        }
        int  file_length = lseek(fd, 0, SEEK_END);
        Pager *pager = (Pager*)malloc(sizeof(Pager));
        pager->file_descriptor = fd;
        pager->file_length = file_length;

        for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
                pager->pages[i] = NULL;
        }
        return pager;
}

ExecuteResult execute_insert(Statement *statement, Table *table)
{
        if (table->num_rows >= TABLE_MAX_ROWS) {
                return EXECUTE_TABLE_FULL;
        }
        Row *row_to_insert = &(statement->row_to_insert);
        serialize_row(row_to_insert, row_slot(table, table->num_rows));
        table->num_rows += 1;
        return EXECUTE_SUCCESS;
}

ExecuteResult execute_select(Statement *statement, Table *table)
{
        Row row;
        for (uint32_t i = 0; i < table->num_rows; i++) {
                deserialize_row(row_slot(table, i), &row);
                print_row(&row);
        }
        return EXECUTE_SUCCESS;
}

ExecuteResult execute_statement(Statement *statement, Table *table)
{
        switch (statement->type) {
                case (STATEMENT_INSERT):
                        return execute_insert(statement, table);
                case (STATEMENT_SELECT):
                        return  execute_select(statement, table);
        }
}



Table *db_open(const char *filename)
{
        Pager *pager      = pager_open(filename);
        uint32_t num_rows = pager->file_length / ROW_SIZE;
        Table *table      = (Table *)malloc(sizeof(Table));
        table->pager = pager;
        table->num_rows   = num_rows;
        return table;
}

void pager_flush(Pager *pager, uint32_t page_num, uint32_t size)
{
        if (pager->pages[page_num] == NULL) {
                printf("Tried to flush null page.\n");
                exit(EXIT_FAILURE);
        }
        off_t offset = lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
        if (offset == -1) {
                printf("Error seeking:%d\n", errno);
                exit(EXIT_FAILURE);
        }
        ssize_t byte_written = write(pager->file_descriptor, pager->pages[page_num], size);
        if (byte_written == -1) {
                printf("Error writing:%d", errno);
                exit(EXIT_FAILURE);
        }
}

void db_close(Table *table)
{
        Pager *pager = table->pager;
        uint32_t num_full_pages = table->num_rows / ROWS_PER_PAGE;
        for (uint32_t i = 0; i < num_full_pages; i++) {
                if (pager->pages[i] == NULL) {
                        break;
                }
                pager_flush(pager, i, PAGE_SIZE);
        }
        uint32_t num_add_rows = table->num_rows % ROWS_PER_PAGE;
        if (num_add_rows > 0) {
                uint32_t page_num = num_full_pages;
                if (pager->pages[page_num] != NULL) {
                        pager_flush(pager, page_num, num_add_rows * ROW_SIZE);
                        free(pager->pages[page_num]);
                        pager->pages[page_num] = NULL;
                }
        }
        int result = close(pager->file_descriptor);
        if (result == -1) {
                printf("Error closing db file.\n");
                exit(EXIT_FAILURE);
        }
        for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
                Pager *page = (Pager *) pager->pages[i];
                if (page) {
                        free(page);
                        pager->pages[i] = NULL;
                }
        }
        free(pager);
        free(table);
}

void print_prompt()
{
        printf("microdb > ");
}

int main(int argc, char **argv)
{
        InputBuffer *input_buffer = new_input_buffer();
        if (argc < 2) {
                printf("Must supply a database filename.\n");
                exit(EXIT_FAILURE);
        }
        char *filename = argv[1];
        Table *table = db_open(filename);
        while (true) {
                print_prompt();
                read_input(input_buffer);
                if (input_buffer->buffer[0] == '.') {
                        switch (do_meta_command(input_buffer,table)) {
                                case (META_COMMAND_SUCCESS):
                                        continue;
                                case (META_COMMAND_UNRECOGNIZED_COMMAND):
                                        printf("Unrecognized command '%s'\n", input_buffer->buffer);
                                        continue;
                        }
                } else {
                        Statement statement;
                        switch (prepare_statement(input_buffer, &statement)) {
                                case (PREPARE_SUCCESS):
                                        break;
                                case (PREPARE_STRING_TOO_LONG):
                                        printf("String is too long.\n");
                                        continue;
                                case (PREPARE_NEGATIVE_ID):
                                        printf("ID must be positive.\n");
                                        continue;
                                case (PREPARE_SYNTAX_ERROR):
                                        printf("Syntax error. Could not parse statement.\n");
                                        continue;
                                case (PREPARE_UNRECOGNIZED_STATEMENT):
                                        printf("Unrecognized keyword at start of '%s'.\n",
                                               input_buffer->buffer);
                                        continue;
                        }
                        switch (execute_statement(&statement, table)) {
                                case EXECUTE_SUCCESS:
                                        printf("Executed.\n");
                                        break;
                                case EXECUTE_TABLE_FULL:
                                        printf("Error: Table full.\n");
                                        break;
                        }
                }
        }
        return 0;
}

The code as a whole is as above, the code is finally compiled under linux, and compiled under windows, a lot of api needs to be changed, but it is relatively simple under linux: g++ -g ./main.cppjust ok.

Run as follows:

[root@localhost microdb]# ./a.out  db.mb 
microdb > select
(1,a,[email protected])
(2,b,[email protected])
(1,a,[email protected])
(4,rrr,[email protected])
(5,ttt,[email protected])
Executed.
microdb > insert 6 d [email protected]
Executed.
microdb > insert 7 f [email protected]
Executed.
microdb > select
(1,a,[email protected])
(2,b,[email protected])
(1,a,[email protected])
(4,rrr,[email protected])
(5,ttt,[email protected])
(6,d,[email protected])
(7,f,[email protected])
Executed.
microdb > insert 8 g [email protected]
Executed.
microdb > select
(1,a,[email protected])
(2,b,[email protected])
(1,a,[email protected])
(4,rrr,[email protected])
(5,ttt,[email protected])
(6,d,[email protected])
(7,f,[email protected])
(8,g,[email protected])
Executed.
microdb > .exit
[root@localhost microdb]# ./a.out  db.mb 
microdb > select
(1,a,[email protected])
(2,b,[email protected])
(1,a,[email protected])
(4,rrr,[email protected])
(5,ttt,[email protected])
(6,d,[email protected])
(7,f,[email protected])
(8,g,[email protected])
Executed.
microdb > .exit

Five View the content of the database file

You can view the content of db.mb through ImHex, as shown below, the id is 1, note that it is stored in the low order of four bytes, it is the small head mode, if it is the big head mode, you need to consider if the saved machine is the big head mode, you need to consider byte order problem. Then a is username, and the next field email is [email protected], but the whole file is quite a waste of space.7b47405b620b09c4549e58aacbe24d7c.png

Guess you like

Origin blog.csdn.net/mseaspring/article/details/128768727