git的update-cache
命令对应的执行入口main
(update-cache.c):
223 int main(int argc, char **argv)
224 {
225 int i, newfd, entries;
226
227 entries = read_cache();
228 if (entries < 0) {
229 perror("cache corrupted");
230 return -1;
231 }
232
233 newfd = open(".dircache/index.lock", O_RDWR | O_CREAT | O_EXCL, 0600);
234 if (newfd < 0) {
235 perror("unable to create new cachefile");
236 return -1;
237 }
238 for (i = 1 ; i < argc; i++) {
239 char *path = argv[i];
240 if (!verify_path(path)) {
241 fprintf(stderr, "Ignoring path %s\n", argv[i]);
242 continue;
243 }
244 if (add_file_to_cache(path)) {
245 fprintf(stderr, "Unable to add %s to database\n", path);
246 goto out;
247 }
248 }
249 if (!write_cache(newfd, active_cache, active_nr) && !rename(".dircache/index.lock", ".dircache/index"))
250 return 0;
251 out:
252 unlink(".dircache/index.lock");
253 }
cache信息保存在文件.dircache/index
里面,每次调用update-cache时,先从该文件装载cache信息,该命令接收后,该文件被更新.使用三个全局变量管理cache信息,这里说的cache信息即对应cache_entry
结构.这三个全局变量为(read-cache.c):
9 struct cache_entry **active_cache = NULL;
10 unsigned int active_nr = 0, active_alloc = 0;
active_cache
是个指针数组,其容量是active_alloc
,其中有效的元素个数是active_nr
, 每个指针元素指向类型cache_entry
对象.当有效元素达到容量空间是,需要对该数组进行扩展.
Line227调用函数read_cache
将暂存区(stage
即cache)的数据装载到内存,.该函数的实现为(read-cache.c):
210 int read_cache(void)
211 {
212 int fd, i;
213 struct stat st;
214 unsigned long size, offset;
215 void *map;
216 struct cache_header *hdr;
217
218 errno = EBUSY;
219 if (active_cache)
220 return error("more than one cachefile");
221 errno = ENOENT;
222 sha1_file_directory = getenv(DB_ENVIRONMENT);
223 if (!sha1_file_directory)
224 sha1_file_directory = DEFAULT_DB_ENVIRONMENT;
225 if (access(sha1_file_directory, X_OK) < 0)
226 return error("no access to SHA1 file directory");
227 fd = open(".dircache/index", O_RDONLY);
228 if (fd < 0)
229 return (errno == ENOENT) ? 0 : error("open failed");
230
231 map = (void *)-1;
232 if (!fstat(fd, &st)) {
233 map = NULL;
234 size = st.st_size;
235 errno = EINVAL;
236 if (size > sizeof(struct cache_header))
237 map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
238 }
239 close(fd);
240 if (-1 == (int)(long)map)
241 return error("mmap failed");
242
243 hdr = map;
244 if (verify_hdr(hdr, size) < 0)
245 goto unmap;
246
247 active_nr = hdr->entries;
248 active_alloc = alloc_nr(active_nr);
249 active_cache = calloc(active_alloc, sizeof(struct cache_entry *));
250
251 offset = sizeof(*hdr);
252 for (i = 0; i < hdr->entries; i++) {
253 struct cache_entry *ce = map + offset;
254 offset = offset + ce_size(ce);
255 active_cache[i] = ce;
256 }
257 return active_nr;
258
259 unmap:
260 munmap(map, size);
261 errno = EINVAL;
262 return error("verify header failed");
263 }
Line219:220如果已装载了index文件,将视为错误.Line222:226判断对象数据库目录是否可以访问,如果不能访问,将视为错误,因为update-cache命令会把对象存储到该目录下面.Line227:241打开index文件, 获取其文件大小,并通过系统调用mmap
将文件内容映射到内存,映射完毕后即可关闭文件.Line244调用函数verify_hdr
确认文件index是否有效.该函数的实现为(read-cache.c):
192 static int verify_hdr(struct cache_header *hdr, unsigned long size)
193 {
194 SHA_CTX c;
195 unsigned char sha1[20];
196
197 if (hdr->signature != CACHE_SIGNATURE)
198 return error("bad signature");
199 if (hdr->version != 1)
200 return error("bad version");
201 SHA1_Init(&c);
202 SHA1_Update(&c, hdr, offsetof(struct cache_header, sha1));
203 SHA1_Update(&c, hdr+1, size - sizeof(*hdr));
204 SHA1_Final(sha1, &c);
205 if (memcmp(sha1, hdr->sha1, 20))
206 return error("bad header sha1");
207 return 0;
208 }
首先检查四字节前面CACHE_SIGNATURE
,然后检查版本号,然后校验SHA1.校验SHA1时需要计算cache_header
对象中的除去sha1字段的元数据信息,以及index文件存储的cache_entry
数据.
回到函数read_cache
,Line247:249获得index文件中的cache_entry
的个数,并且据此分配指针数组.Line251:257将指针数组元素依次指向相应的cache_entry
对象.当第一次在该目录下执行update-cache
命令时,Line228打开文件将返回文件不存在.
回到update-cache命令的入口函数,Line233:237创建一个临时文件.dircache/index.lock.这里的一个细节是系统调用open
的flag
参数值O_CREAT | O_EXCL
的组合,将原子的创建一个文件.Line238:248对于命令update-cache的每个参数(是文件路径)调用函数verify_path
进行确认后,然后调用函数add_file_to_cache
,该函数的实现为(update-cache.c):
124 static int add_file_to_cache(char *path)
125 {
126 int size, namelen;
127 struct cache_entry *ce;
128 struct stat st;
129 int fd;
130
131 fd = open(path, O_RDONLY);
132 if (fd < 0) {
133 if (errno == ENOENT)
134 return remove_file_from_cache(path);
135 return -1;
136 }
137 if (fstat(fd, &st) < 0) {
138 close(fd);
139 return -1;
140 }
141 namelen = strlen(path);
142 size = cache_entry_size(namelen);
143 ce = malloc(size);
144 memset(ce, 0, size);
145 memcpy(ce->name, path, namelen);
146 ce->ctime.sec = st.st_ctime;
147 ce->ctime.nsec = st.st_ctim.tv_nsec;
148 ce->mtime.sec = st.st_mtime;
149 ce->mtime.nsec = st.st_mtim.tv_nsec;
150 ce->st_dev = st.st_dev;
151 ce->st_ino = st.st_ino;
152 ce->st_mode = st.st_mode;
153 ce->st_uid = st.st_uid;
154 ce->st_gid = st.st_gid;
155 ce->st_size = st.st_size;
156 ce->namelen = namelen;
157
158 if (index_fd(path, namelen, ce, fd, &st) < 0)
159 return -1;
160
161 return add_cache_entry(ce);
162 }
Line131:136如果打开文件失败,并且失败原因是文件不存在,这种场景相当于执行git rm
命令.这时将调用函数remove_file_from_cache
将该cache_entry
对象从管理对象active_cache
中删除,这种情况下,函数add_file_to_cache
将返回,因为文件不存在,不需要后续的逻辑进行写文件对象的处理(对应git add
命令).函数remove_file_from_cache
的实现为(update-cache.c):
44 static int remove_file_from_cache(char *path)
45 {
46 int pos = cache_name_pos(path, strlen(path));
47 if (pos < 0) {
48 pos = -pos-1;
49 active_nr--;
50 if (pos < active_nr)
51 memmove(active_cache + pos, active_cache + pos + 1, (active_nr - pos - 1) * sizeof(struct cache_entry *));
52 }
53 }
Line46调用函数cache_name_pos
获得包含该文件路径的元素在active_cache
数组中的索引,该函数的实现为(update-cache.c):
23 static int cache_name_pos(const char *name, int namelen)
24 {
25 int first, last;
26
27 first = 0;
28 last = active_nr;
29 while (last > first) {
30 int next = (last + first) >> 1;
31 struct cache_entry *ce = active_cache[next];
32 int cmp = cache_name_compare(name, namelen, ce->name, ce->namelen);
33 if (!cmp)
34 return -next-1;
35 if (cmp < 0) {
36 last = next;
37 continue;
38 }
39 first = next+1;
40 }
41 return first;
42 }
注意active_cache
数组中的元素是根据文件路径名进行排序的,如果包含该文件路径的cache_entry
对象已经存在于数组之中,则返回负数.
回到函数remove_file_from_cache
,Line47:52将存在的cache_entry
对象从active_cache
数组中删除.
回到函数add_file_to_cache
,Line137:156通过系统调用fstat
获得文件内部元数据,并赋值给cache_entry
对象.Line158:159通过调用函数index_fd
写文件对象.该函数的实现为(update-cache.c):
81 static int index_fd(const char *path, int namelen, struct cache_entry *ce, int fd, struct stat *st)
82 {
83 z_stream stream;
84 int max_out_bytes = namelen + st->st_size + 200;
85 void *out = malloc(max_out_bytes);
86 void *metadata = malloc(namelen + 200);
87 void *in = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fd, 0);
88 SHA_CTX c;
89
90 close(fd);
91 if (!out || (int)(long)in == -1)
92 return -1;
93
94 memset(&stream, 0, sizeof(stream));
95 deflateInit(&stream, Z_BEST_COMPRESSION);
96
97 /*
98 * ASCII size + nul byte
99 */
100 stream.next_in = metadata;
101 stream.avail_in = 1+sprintf(metadata, "blob %lu", (unsigned long) st->st_size);
102 stream.next_out = out;
103 stream.avail_out = max_out_bytes;
104 while (deflate(&stream, 0) == Z_OK)
105 /* nothing */;
106
107 /*
108 * File content
109 */
110 stream.next_in = in;
111 stream.avail_in = st->st_size;
112 while (deflate(&stream, Z_FINISH) == Z_OK)
113 /*nothing */;
114
115 deflateEnd(&stream);
116
117 SHA1_Init(&c);
118 SHA1_Update(&c, out, stream.total_out);
119 SHA1_Final(ce->sha1, &c);
120
121 return write_sha1_buffer(ce->sha1, out, stream.total_out);
122 }
该函数的就是把文件压缩,写入对应的文件对象中.Line87:92通过系统调用mmap
将文件内容映射到内存中,然后关闭文件.Line94:95初始化deflate
压缩算法.Line97:105将文件长度信息压缩写入deflate流中.Line107:115将文件内容压缩写入的哦deflate流中,然后结束压缩.Line117:119对压缩流计算SHA1.Line121调用函数write_sha1_buffer
写文件对象,该函数的实现为(read-cache.c):
173 int write_sha1_buffer(unsigned char *sha1, void *buf, unsigned int size)
174 {
175 char *filename = sha1_file_name(sha1);
176 int i, fd;
177
178 fd = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0666);
179 if (fd < 0)
180 return (errno == EEXIST) ? 0 : -1;
181 write(fd, buf, size);
182 close(fd);
183 return 0;
184 }
Line175调用函数sha1_file_name
,根据文件sha1生成文件名.该函数的实现为(read-cache.c):
57 /*
58 * NOTE! This returns a statically allocated buffer, so you have to be
59 * careful about using it. Do a "strdup()" if you need to save the
60 * filename.
61 */
62 char *sha1_file_name(unsigned char *sha1)
63 {
64 int i;
65 static char *name, *base;
66
67 if (!base) {
68 char *sha1_file_directory = getenv(DB_ENVIRONMENT) ? : DEFAULT_DB_ENVIRONMENT;
69 int len = strlen(sha1_file_directory);
70 base = malloc(len + 60);
71 memcpy(base, sha1_file_directory, len);
72 memset(base+len, 0, 60);
73 base[len] = '/';
74 base[len+3] = '/';
75 name = base + len + 1;
76 }
77 for (i = 0; i < 20; i++) {
78 static char hex[] = "0123456789abcdef";
79 unsigned int val = sha1[i];
80 char *pos = name + i*2 + (i > 0);
81 *pos++ = hex[val >> 4];
82 *pos = hex[val & 0xf];
83 }
84 return base;
85 }
Line68:73是该文件对象的祖父目录.Line73:75此处base[len+1]
和base[len+2]
两个字符将是文件对象的父目录名字.Line77:83生成文件对象的父目录和文件对象的名字.其中sha1的20个字符中,前两个字符是文件对象的父目录名字,sha1中后18个字符即为文件对象的名字.
回到函数write_sha1_buffer
,Line178:182创建文件对象,写入压缩后的文件内容.
回到函数add_file_to_cache
,写入文件对象以后,Line161调用函数add_cache_entry
将该cache_entry
对象添加到管理数组中.该函数的实现为(update-cache.c):
55 static int add_cache_entry(struct cache_entry *ce)
56 {
57 int pos;
58
59 pos = cache_name_pos(ce->name, ce->namelen);
60
61 /* existing match? Just replace it */
62 if (pos < 0) {
63 active_cache[-pos-1] = ce;
64 return 0;
65 }
66
67 /* Make sure the array is big enough .. */
68 if (active_nr == active_alloc) {
69 active_alloc = alloc_nr(active_alloc);
70 active_cache = realloc(active_cache, active_alloc * sizeof(struct cache_entry *));
71 }
72
73 /* Add it in.. */
74 active_nr++;
75 if (active_nr > pos)
76 memmove(active_cache + pos + 1, active_cache + pos, (active_nr - pos - 1) * sizeof(ce));
77 active_cache[pos] = ce;
78 return 0;
79 }
Line59根据文件路径获得该元素在管理数组中的索引.Line61:65如果元素存在,替换后返回.Line67:71如果元素个数达到容量大小,对数组进行扩容.Line73:77在管理数组中记录该元素.
回到update_cache
命令的入口函数main
,Line249调用write_cache
将内存对象的管理数组active_cache
写入临时文件,然后将临时文件重命名为index,即完成index文件的更新.函数write_cache
的实现为(update-cache.c):
164 static int write_cache(int newfd, struct cache_entry **cache, int entries)
165 {
166 SHA_CTX c;
167 struct cache_header hdr;
168 int i;
169
170 hdr.signature = CACHE_SIGNATURE;
171 hdr.version = 1;
172 hdr.entries = entries;
173
174 SHA1_Init(&c);
175 SHA1_Update(&c, &hdr, offsetof(struct cache_header, sha1));
176 for (i = 0; i < entries; i++) {
177 struct cache_entry *ce = cache[i];
178 int size = ce_size(ce);
179 SHA1_Update(&c, ce, size);
180 }
181 SHA1_Final(hdr.sha1, &c);
182
183 if (write(newfd, &hdr, sizeof(hdr)) != sizeof(hdr))
184 return -1;
185
186 for (i = 0; i < entries; i++) {
187 struct cache_entry *ce = cache[i];
188 int size = ce_size(ce);
189 if (write(newfd, ce, size) != size)
190 return -1;
191 }
192 return 0;
193 }
Line170:181初始化index文件元数据,即cache_header
对象内容.Line183:184写入元数据.Line186:191写入所有的cache_entry
对象内容.