git的update-cache命令执行逻辑

git的update-cache命令对应的执行入口main(update-cache.c):

223 int main(int argc, char **argv)
224 {
225         int i, newfd, entries;
226 
227         entries = read_cache();
228         if (entries < 0) {
229                 perror("cache corrupted");
230                 return -1;
231         }
232 
233         newfd = open(".dircache/index.lock", O_RDWR | O_CREAT | O_EXCL, 0600);
234         if (newfd < 0) {
235                 perror("unable to create new cachefile");
236                 return -1;
237         }
238         for (i = 1 ; i < argc; i++) {
239                 char *path = argv[i];
240                 if (!verify_path(path)) {
241                         fprintf(stderr, "Ignoring path %s\n", argv[i]);
242                         continue;
243                 }
244                 if (add_file_to_cache(path)) {
245                         fprintf(stderr, "Unable to add %s to database\n", path);
246                         goto out;
247                 }
248         }
249         if (!write_cache(newfd, active_cache, active_nr) && !rename(".dircache/index.lock", ".dircache/index"))
250                 return 0;
251 out:
252         unlink(".dircache/index.lock");
253 }

cache信息保存在文件.dircache/index里面,每次调用update-cache时,先从该文件装载cache信息,该命令接收后,该文件被更新.使用三个全局变量管理cache信息,这里说的cache信息即对应cache_entry结构.这三个全局变量为(read-cache.c):

 9 struct cache_entry **active_cache = NULL;
10 unsigned int active_nr = 0, active_alloc = 0;

active_cache是个指针数组,其容量是active_alloc,其中有效的元素个数是active_nr, 每个指针元素指向类型cache_entry对象.当有效元素达到容量空间是,需要对该数组进行扩展.
Line227调用函数read_cache将暂存区(stage 即cache)的数据装载到内存,.该函数的实现为(read-cache.c):

210 int read_cache(void)
211 {
212         int fd, i;
213         struct stat st;
214         unsigned long size, offset;
215         void *map;
216         struct cache_header *hdr;
217 
218         errno = EBUSY;
219         if (active_cache)
220                 return error("more than one cachefile");
221         errno = ENOENT;
222         sha1_file_directory = getenv(DB_ENVIRONMENT);
223         if (!sha1_file_directory)
224                 sha1_file_directory = DEFAULT_DB_ENVIRONMENT;
225         if (access(sha1_file_directory, X_OK) < 0)
226                 return error("no access to SHA1 file directory");
227         fd = open(".dircache/index", O_RDONLY);
228         if (fd < 0)
229                 return (errno == ENOENT) ? 0 : error("open failed");
230 
231         map = (void *)-1;
232         if (!fstat(fd, &st)) {
233                 map = NULL;
234                 size = st.st_size;
235                 errno = EINVAL;
236                 if (size > sizeof(struct cache_header))
237                         map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
238         }
239         close(fd);
240         if (-1 == (int)(long)map)
241                 return error("mmap failed");
242 
243         hdr = map;
244         if (verify_hdr(hdr, size) < 0)
245                 goto unmap;
246 
247         active_nr = hdr->entries;
248         active_alloc = alloc_nr(active_nr);
249         active_cache = calloc(active_alloc, sizeof(struct cache_entry *));
250 
251         offset = sizeof(*hdr);
252         for (i = 0; i < hdr->entries; i++) {
253                 struct cache_entry *ce = map + offset;
254                 offset = offset + ce_size(ce);
255                 active_cache[i] = ce;
256         }
257         return active_nr;
258 
259 unmap:
260         munmap(map, size);
261         errno = EINVAL;
262         return error("verify header failed");
263 }

Line219:220如果已装载了index文件,将视为错误.Line222:226判断对象数据库目录是否可以访问,如果不能访问,将视为错误,因为update-cache命令会把对象存储到该目录下面.Line227:241打开index文件, 获取其文件大小,并通过系统调用mmap将文件内容映射到内存,映射完毕后即可关闭文件.Line244调用函数verify_hdr确认文件index是否有效.该函数的实现为(read-cache.c):

192 static int verify_hdr(struct cache_header *hdr, unsigned long size)
193 {
194         SHA_CTX c;
195         unsigned char sha1[20];
196 
197         if (hdr->signature != CACHE_SIGNATURE)
198                 return error("bad signature");
199         if (hdr->version != 1)
200                 return error("bad version");
201         SHA1_Init(&c);
202         SHA1_Update(&c, hdr, offsetof(struct cache_header, sha1));
203         SHA1_Update(&c, hdr+1, size - sizeof(*hdr));
204         SHA1_Final(sha1, &c);
205         if (memcmp(sha1, hdr->sha1, 20))
206                 return error("bad header sha1");
207         return 0;
208 }

首先检查四字节前面CACHE_SIGNATURE,然后检查版本号,然后校验SHA1.校验SHA1时需要计算cache_header对象中的除去sha1字段的元数据信息,以及index文件存储的cache_entry数据.

回到函数read_cache,Line247:249获得index文件中的cache_entry的个数,并且据此分配指针数组.Line251:257将指针数组元素依次指向相应的cache_entry对象.当第一次在该目录下执行update-cache命令时,Line228打开文件将返回文件不存在.

回到update-cache命令的入口函数,Line233:237创建一个临时文件.dircache/index.lock.这里的一个细节是系统调用openflag参数值O_CREAT | O_EXCL的组合,将原子的创建一个文件.Line238:248对于命令update-cache的每个参数(是文件路径)调用函数verify_path进行确认后,然后调用函数add_file_to_cache,该函数的实现为(update-cache.c):

124 static int add_file_to_cache(char *path)
125 {
126         int size, namelen;
127         struct cache_entry *ce;
128         struct stat st;
129         int fd;
130 
131         fd = open(path, O_RDONLY);
132         if (fd < 0) {
133                 if (errno == ENOENT)
134                         return remove_file_from_cache(path);
135                 return -1;
136         }
137         if (fstat(fd, &st) < 0) {
138                 close(fd);
139                 return -1;
140         }
141         namelen = strlen(path);
142         size = cache_entry_size(namelen);
143         ce = malloc(size);
144         memset(ce, 0, size);
145         memcpy(ce->name, path, namelen);
146         ce->ctime.sec = st.st_ctime;
147         ce->ctime.nsec = st.st_ctim.tv_nsec;
148         ce->mtime.sec = st.st_mtime;
149         ce->mtime.nsec = st.st_mtim.tv_nsec;
150         ce->st_dev = st.st_dev;
151         ce->st_ino = st.st_ino;
152         ce->st_mode = st.st_mode;
153         ce->st_uid = st.st_uid;
154         ce->st_gid = st.st_gid;
155         ce->st_size = st.st_size;
156         ce->namelen = namelen;
157 
158         if (index_fd(path, namelen, ce, fd, &st) < 0)
159                 return -1;
160 
161         return add_cache_entry(ce);
162 }

Line131:136如果打开文件失败,并且失败原因是文件不存在,这种场景相当于执行git rm命令.这时将调用函数remove_file_from_cache将该cache_entry对象从管理对象active_cache中删除,这种情况下,函数add_file_to_cache将返回,因为文件不存在,不需要后续的逻辑进行写文件对象的处理(对应git add命令).函数remove_file_from_cache的实现为(update-cache.c):

44 static int remove_file_from_cache(char *path)
45 {
46         int pos = cache_name_pos(path, strlen(path));
47         if (pos < 0) {
48                 pos = -pos-1;
49                 active_nr--;
50                 if (pos < active_nr)
51                         memmove(active_cache + pos, active_cache + pos + 1, (active_nr - pos - 1) * sizeof(struct cache_entry *));
52         }
53 }

Line46调用函数cache_name_pos获得包含该文件路径的元素在active_cache 数组中的索引,该函数的实现为(update-cache.c):

23 static int cache_name_pos(const char *name, int namelen)
24 {
25         int first, last;
26 
27         first = 0;
28         last = active_nr;
29         while (last > first) {
30                 int next = (last + first) >> 1;
31                 struct cache_entry *ce = active_cache[next];
32                 int cmp = cache_name_compare(name, namelen, ce->name, ce->namelen);
33                 if (!cmp)
34                         return -next-1;
35                 if (cmp < 0) {
36                         last = next;
37                         continue;
38                 }
39                 first = next+1;
40         }
41         return first;
42 }

注意active_cache数组中的元素是根据文件路径名进行排序的,如果包含该文件路径的cache_entry对象已经存在于数组之中,则返回负数.
回到函数remove_file_from_cache,Line47:52将存在的cache_entry对象从active_cache数组中删除.
回到函数add_file_to_cache,Line137:156通过系统调用fstat获得文件内部元数据,并赋值给cache_entry对象.Line158:159通过调用函数index_fd写文件对象.该函数的实现为(update-cache.c):

 81 static int index_fd(const char *path, int namelen, struct cache_entry *ce, int fd, struct stat *st)
 82 {
 83         z_stream stream;
 84         int max_out_bytes = namelen + st->st_size + 200;
 85         void *out = malloc(max_out_bytes);
 86         void *metadata = malloc(namelen + 200);
 87         void *in = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fd, 0);
 88         SHA_CTX c;
 89 
 90         close(fd);
 91         if (!out || (int)(long)in == -1)
 92                 return -1;
 93 
 94         memset(&stream, 0, sizeof(stream));
 95         deflateInit(&stream, Z_BEST_COMPRESSION);
 96 
 97         /*
 98          * ASCII size + nul byte
 99          */
100         stream.next_in = metadata;
101         stream.avail_in = 1+sprintf(metadata, "blob %lu", (unsigned long) st->st_size);
102         stream.next_out = out;
103         stream.avail_out = max_out_bytes;
104         while (deflate(&stream, 0) == Z_OK)
105                 /* nothing */;
106 
107         /*
108          * File content
109          */
110         stream.next_in = in;
111         stream.avail_in = st->st_size;
112         while (deflate(&stream, Z_FINISH) == Z_OK)
113                 /*nothing */;
114 
115         deflateEnd(&stream);
116 
117         SHA1_Init(&c);
118         SHA1_Update(&c, out, stream.total_out);
119         SHA1_Final(ce->sha1, &c);
120 
121         return write_sha1_buffer(ce->sha1, out, stream.total_out);
122 }

该函数的就是把文件压缩,写入对应的文件对象中.Line87:92通过系统调用mmap将文件内容映射到内存中,然后关闭文件.Line94:95初始化deflate压缩算法.Line97:105将文件长度信息压缩写入deflate流中.Line107:115将文件内容压缩写入的哦deflate流中,然后结束压缩.Line117:119对压缩流计算SHA1.Line121调用函数write_sha1_buffer写文件对象,该函数的实现为(read-cache.c):

173 int write_sha1_buffer(unsigned char *sha1, void *buf, unsigned int size)
174 {
175         char *filename = sha1_file_name(sha1);
176         int i, fd;
177 
178         fd = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0666);
179         if (fd < 0)
180                 return (errno == EEXIST) ? 0 : -1;
181         write(fd, buf, size);
182         close(fd);
183         return 0;
184 }

Line175调用函数sha1_file_name,根据文件sha1生成文件名.该函数的实现为(read-cache.c):

57 /*
58  * NOTE! This returns a statically allocated buffer, so you have to be
59  * careful about using it. Do a "strdup()" if you need to save the
60  * filename.
61  */
62 char *sha1_file_name(unsigned char *sha1)
63 {
64         int i;
65         static char *name, *base;
66 
67         if (!base) {
68                 char *sha1_file_directory = getenv(DB_ENVIRONMENT) ? : DEFAULT_DB_ENVIRONMENT;
69                 int len = strlen(sha1_file_directory);
70                 base = malloc(len + 60);
71                 memcpy(base, sha1_file_directory, len);
72                 memset(base+len, 0, 60);
73                 base[len] = '/';
74                 base[len+3] = '/';
75                 name = base + len + 1;
76         }
77         for (i = 0; i < 20; i++) {
78                 static char hex[] = "0123456789abcdef";
79                 unsigned int val = sha1[i];
80                 char *pos = name + i*2 + (i > 0);
81                 *pos++ = hex[val >> 4];
82                 *pos = hex[val & 0xf];
83         }
84         return base;
85 }

Line68:73是该文件对象的祖父目录.Line73:75此处base[len+1]base[len+2]两个字符将是文件对象的父目录名字.Line77:83生成文件对象的父目录和文件对象的名字.其中sha1的20个字符中,前两个字符是文件对象的父目录名字,sha1中后18个字符即为文件对象的名字.
回到函数write_sha1_buffer,Line178:182创建文件对象,写入压缩后的文件内容.
回到函数add_file_to_cache,写入文件对象以后,Line161调用函数add_cache_entry将该cache_entry对象添加到管理数组中.该函数的实现为(update-cache.c):

55 static int add_cache_entry(struct cache_entry *ce)
56 {
57         int pos;
58 
59         pos = cache_name_pos(ce->name, ce->namelen);
60 
61         /* existing match? Just replace it */
62         if (pos < 0) {
63                 active_cache[-pos-1] = ce;
64                 return 0;
65         }
66 
67         /* Make sure the array is big enough .. */
68         if (active_nr == active_alloc) {
69                 active_alloc = alloc_nr(active_alloc);
70                 active_cache = realloc(active_cache, active_alloc * sizeof(struct cache_entry *));
71         }
72 
73         /* Add it in.. */
74         active_nr++;
75         if (active_nr > pos)
76                 memmove(active_cache + pos + 1, active_cache + pos, (active_nr - pos - 1) * sizeof(ce));
77         active_cache[pos] = ce;
78         return 0;
79 }

Line59根据文件路径获得该元素在管理数组中的索引.Line61:65如果元素存在,替换后返回.Line67:71如果元素个数达到容量大小,对数组进行扩容.Line73:77在管理数组中记录该元素.
回到update_cache命令的入口函数main,Line249调用write_cache将内存对象的管理数组active_cache写入临时文件,然后将临时文件重命名为index,即完成index文件的更新.函数write_cache的实现为(update-cache.c):

164 static int write_cache(int newfd, struct cache_entry **cache, int entries)
165 {
166         SHA_CTX c;
167         struct cache_header hdr;
168         int i;
169 
170         hdr.signature = CACHE_SIGNATURE;
171         hdr.version = 1;
172         hdr.entries = entries;
173 
174         SHA1_Init(&c);
175         SHA1_Update(&c, &hdr, offsetof(struct cache_header, sha1));
176         for (i = 0; i < entries; i++) {
177                 struct cache_entry *ce = cache[i];
178                 int size = ce_size(ce);
179                 SHA1_Update(&c, ce, size);
180         }
181         SHA1_Final(hdr.sha1, &c);
182 
183         if (write(newfd, &hdr, sizeof(hdr)) != sizeof(hdr))
184                 return -1;
185 
186         for (i = 0; i < entries; i++) {
187                 struct cache_entry *ce = cache[i];
188                 int size = ce_size(ce);
189                 if (write(newfd, ce, size) != size)
190                         return -1;
191         }
192         return 0;
193 }

Line170:181初始化index文件元数据,即cache_header对象内容.Line183:184写入元数据.Line186:191写入所有的cache_entry对象内容.

猜你喜欢

转载自blog.csdn.net/azurelaker/article/details/81675175