1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24: #include "qemu-common.h"
25: #include "block_int.h"
26: #include <zlib.h>
27: #include "aes.h"
28: #include <assert.h>
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49: #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
50: #define QCOW_VERSION 2
51:
52: #define QCOW_CRYPT_NONE 0
53: #define QCOW_CRYPT_AES 1
54:
55:
56: #define QCOW_OFLAG_COPIED (1LL << 63)
57:
58: #define QCOW_OFLAG_COMPRESSED (1LL << 62)
59:
60: #define REFCOUNT_SHIFT 1
61:
62: #ifndef offsetof
63: #define offsetof(type, field) ((size_t) &((type *)0)->field)
64: #endif
65:
66: typedef struct QCowHeader {
67: uint32_t magic;
68: uint32_t version;
69: uint64_t backing_file_offset;
70: uint32_t backing_file_size;
71: uint32_t cluster_bits;
72: uint64_t size;
73: uint32_t crypt_method;
74: uint32_t l1_size;
75: uint64_t l1_table_offset;
76: uint64_t refcount_table_offset;
77: uint32_t refcount_table_clusters;
78: uint32_t nb_snapshots;
79: uint64_t snapshots_offset;
80: } QCowHeader;
81:
82: typedef struct __attribute__((packed)) QCowSnapshotHeader {
83:
84: uint64_t l1_table_offset;
85:
86: uint32_t l1_size;
87: uint16_t id_str_size;
88: uint16_t name_size;
89:
90: uint32_t date_sec;
91: uint32_t date_nsec;
92:
93: uint64_t vm_clock_nsec;
94:
95: uint32_t vm_state_size;
96: uint32_t extra_data_size;
97:
98:
99:
100: } QCowSnapshotHeader;
101:
102: #define L2_CACHE_SIZE 16
103:
104: typedef struct QCowSnapshot {
105: uint64_t l1_table_offset;
106: uint32_t l1_size;
107: char *id_str;
108: char *name;
109: uint32_t vm_state_size;
110: uint32_t date_sec;
111: uint32_t date_nsec;
112: uint64_t vm_clock_nsec;
113: } QCowSnapshot;
114:
115: typedef struct BDRVQcowState {
116: BlockDriverState *hd;
117: int cluster_bits;
118: int cluster_size;
119: int cluster_sectors;
120: int l2_bits;
121: int l2_size;
122: int l1_size;
123: int l1_vm_state_index;
124: int csize_shift;
125: int csize_mask;
126: uint64_t cluster_offset_mask;
127: uint64_t l1_table_offset;
128: uint64_t *l1_table;
129: uint64_t *l2_cache;
130: uint64_t l2_cache_offsets[L2_CACHE_SIZE];
131: uint32_t l2_cache_counts[L2_CACHE_SIZE];
132: uint8_t *cluster_cache;
133: uint8_t *cluster_data;
134: uint64_t cluster_cache_offset;
135:
136: uint64_t *refcount_table;
137: uint64_t refcount_table_offset;
138: uint32_t refcount_table_size;
139: uint64_t refcount_block_cache_offset;
140: uint16_t *refcount_block_cache;
141: int64_t free_cluster_index;
142: int64_t free_byte_offset;
143:
144: uint32_t crypt_method;
145: uint32_t crypt_method_header;
146: AES_KEY aes_encrypt_key;
147: AES_KEY aes_decrypt_key;
148: uint64_t snapshots_offset;
149: int snapshots_size;
150: int nb_snapshots;
151: QCowSnapshot *snapshots;
152: } BDRVQcowState;
153:
154: static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
155: static int qcow_read(BlockDriverState *bs, int64_t sector_num,
156: uint8_t *buf, int nb_sectors);
157: static int qcow_read_snapshots(BlockDriverState *bs);
158: static void qcow_free_snapshots(BlockDriverState *bs);
159: static int refcount_init(BlockDriverState *bs);
160: static void refcount_close(BlockDriverState *bs);
161: static int get_refcount(BlockDriverState *bs, int64_t cluster_index);
162: static int update_cluster_refcount(BlockDriverState *bs,
163: int64_t cluster_index,
164: int addend);
165: static void update_refcount(BlockDriverState *bs,
166: int64_t offset, int64_t length,
167: int addend);
168: static int64_t alloc_clusters(BlockDriverState *bs, int64_t size);
169: static int64_t alloc_bytes(BlockDriverState *bs, int size);
170: static void free_clusters(BlockDriverState *bs,
171: int64_t offset, int64_t size);
172: #ifdef DEBUG_ALLOC
173: static void check_refcounts(BlockDriverState *bs);
174: #endif
175:
176: static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
177: {
178: const QCowHeader *cow_header = (const void *)buf;
179:
180: if (buf_size >= sizeof(QCowHeader) &&
181: be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
182: be32_to_cpu(cow_header->version) == QCOW_VERSION)
183: return 100;
184: else
185: return 0;
186: }
187:
188: static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
189: {
190: BDRVQcowState *s = bs->opaque;
191: int len, i, shift, ret;
192: QCowHeader header;
193:
194: ret = bdrv_file_open(&s->hd, filename, flags);
195: if (ret < 0)
196: return ret;
197: if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
198: goto fail;
199: be32_to_cpus(&header.magic);
200: be32_to_cpus(&header.version);
201: be64_to_cpus(&header.backing_file_offset);
202: be32_to_cpus(&header.backing_file_size);
203: be64_to_cpus(&header.size);
204: be32_to_cpus(&header.cluster_bits);
205: be32_to_cpus(&header.crypt_method);
206: be64_to_cpus(&header.l1_table_offset);
207: be32_to_cpus(&header.l1_size);
208: be64_to_cpus(&header.refcount_table_offset);
209: be32_to_cpus(&header.refcount_table_clusters);
210: be64_to_cpus(&header.snapshots_offset);
211: be32_to_cpus(&header.nb_snapshots);
212:
213: if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
214: goto fail;
215: if (header.size <= 1 ||
216: header.cluster_bits < 9 ||
217: header.cluster_bits > 16)
218: goto fail;
219: if (header.crypt_method > QCOW_CRYPT_AES)
220: goto fail;
221: s->crypt_method_header = header.crypt_method;
222: if (s->crypt_method_header)
223: bs->encrypted = 1;
224: s->cluster_bits = header.cluster_bits;
225: s->cluster_size = 1 << s->cluster_bits;
226: s->cluster_sectors = 1 << (s->cluster_bits - 9);
227: s->l2_bits = s->cluster_bits - 3;
228: s->l2_size = 1 << s->l2_bits;
229: bs->total_sectors = header.size / 512;
230: s->csize_shift = (62 - (s->cluster_bits - 8));
231: s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
232: s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
233: s->refcount_table_offset = header.refcount_table_offset;
234: s->refcount_table_size =
235: header.refcount_table_clusters << (s->cluster_bits - 3);
236:
237: s->snapshots_offset = header.snapshots_offset;
238: s->nb_snapshots = header.nb_snapshots;
239:
240:
241: s->l1_size = header.l1_size;
242: shift = s->cluster_bits + s->l2_bits;
243: s->l1_vm_state_index = (header.size + (1LL << shift) - 1) >> shift;
244:
245:
246: if (s->l1_size < s->l1_vm_state_index)
247: goto fail;
248: s->l1_table_offset = header.l1_table_offset;
249: s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
250: if (!s->l1_table)
251: goto fail;
252: if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
253: s->l1_size * sizeof(uint64_t))
254: goto fail;
255: for(i = 0;i < s->l1_size; i++) {
256: be64_to_cpus(&s->l1_table[i]);
257: }
258:
259: s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
260: if (!s->l2_cache)
261: goto fail;
262: s->cluster_cache = qemu_malloc(s->cluster_size);
263: if (!s->cluster_cache)
264: goto fail;
265:
266: s->cluster_data = qemu_malloc(s->cluster_size + 512);
267: if (!s->cluster_data)
268: goto fail;
269: s->cluster_cache_offset = -1;
270:
271: if (refcount_init(bs) < 0)
272: goto fail;
273:
274:
275: if (header.backing_file_offset != 0) {
276: len = header.backing_file_size;
277: if (len > 1023)
278: len = 1023;
279: if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
280: goto fail;
281: bs->backing_file[len] = '\0';
282: }
283: if (qcow_read_snapshots(bs) < 0)
284: goto fail;
285:
286: #ifdef DEBUG_ALLOC
287: check_refcounts(bs);
288: #endif
289: return 0;
290:
291: fail:
292: qcow_free_snapshots(bs);
293: refcount_close(bs);
294: qemu_free(s->l1_table);
295: qemu_free(s->l2_cache);
296: qemu_free(s->cluster_cache);
297: qemu_free(s->cluster_data);
298: bdrv_delete(s->hd);
299: return -1;
300: }
301:
302: static int qcow_set_key(BlockDriverState *bs, const char *key)
303: {
304: BDRVQcowState *s = bs->opaque;
305: uint8_t keybuf[16];
306: int len, i;
307:
308: memset(keybuf, 0, 16);
309: len = strlen(key);
310: if (len > 16)
311: len = 16;
312:
313:
314: for(i = 0;i < len;i++) {
315: keybuf[i] = key[i];
316: }
317: s->crypt_method = s->crypt_method_header;
318:
319: if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
320: return -1;
321: if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
322: return -1;
323: #if 0
324:
325: {
326: uint8_t in[16];
327: uint8_t out[16];
328: uint8_t tmp[16];
329: for(i=0;i<16;i++)
330: in[i] = i;
331: AES_encrypt(in, tmp, &s->aes_encrypt_key);
332: AES_decrypt(tmp, out, &s->aes_decrypt_key);
333: for(i = 0; i < 16; i++)
334: printf(" %02x", tmp[i]);
335: printf("\n");
336: for(i = 0; i < 16; i++)
337: printf(" %02x", out[i]);
338: printf("\n");
339: }
340: #endif
341: return 0;
342: }
343:
344:
345:
346:
347: static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
348: uint8_t *out_buf, const uint8_t *in_buf,
349: int nb_sectors, int enc,
350: const AES_KEY *key)
351: {
352: union {
353: uint64_t ll[2];
354: uint8_t b[16];
355: } ivec;
356: int i;
357:
358: for(i = 0; i < nb_sectors; i++) {
359: ivec.ll[0] = cpu_to_le64(sector_num);
360: ivec.ll[1] = 0;
361: AES_cbc_encrypt(in_buf, out_buf, 512, key,
362: ivec.b, enc);
363: sector_num++;
364: in_buf += 512;
365: out_buf += 512;
366: }
367: }
368:
369: static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
370: uint64_t cluster_offset, int n_start, int n_end)
371: {
372: BDRVQcowState *s = bs->opaque;
373: int n, ret;
374:
375: n = n_end - n_start;
376: if (n <= 0)
377: return 0;
378: ret = qcow_read(bs, start_sect + n_start, s->cluster_data, n);
379: if (ret < 0)
380: return ret;
381: if (s->crypt_method) {
382: encrypt_sectors(s, start_sect + n_start,
383: s->cluster_data,
384: s->cluster_data, n, 1,
385: &s->aes_encrypt_key);
386: }
387: ret = bdrv_write(s->hd, (cluster_offset >> 9) + n_start,
388: s->cluster_data, n);
389: if (ret < 0)
390: return ret;
391: return 0;
392: }
393:
394: static void l2_cache_reset(BlockDriverState *bs)
395: {
396: BDRVQcowState *s = bs->opaque;
397:
398: memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
399: memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
400: memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
401: }
402:
403: static inline int l2_cache_new_entry(BlockDriverState *bs)
404: {
405: BDRVQcowState *s = bs->opaque;
406: uint32_t min_count;
407: int min_index, i;
408:
409:
410: min_index = 0;
411: min_count = 0xffffffff;
412: for(i = 0; i < L2_CACHE_SIZE; i++) {
413: if (s->l2_cache_counts[i] < min_count) {
414: min_count = s->l2_cache_counts[i];
415: min_index = i;
416: }
417: }
418: return min_index;
419: }
420:
421: static int64_t align_offset(int64_t offset, int n)
422: {
423: offset = (offset + n - 1) & ~(n - 1);
424: return offset;
425: }
426:
427: static int grow_l1_table(BlockDriverState *bs, int min_size)
428: {
429: BDRVQcowState *s = bs->opaque;
430: int new_l1_size, new_l1_size2, ret, i;
431: uint64_t *new_l1_table;
432: uint64_t new_l1_table_offset;
433: uint64_t data64;
434: uint32_t data32;
435:
436: new_l1_size = s->l1_size;
437: if (min_size <= new_l1_size)
438: return 0;
439: while (min_size > new_l1_size) {
440: new_l1_size = (new_l1_size * 3 + 1) / 2;
441: }
442: #ifdef DEBUG_ALLOC2
443: printf("grow l1_table from %d to %d\n", s->l1_size, new_l1_size);
444: #endif
445:
446: new_l1_size2 = sizeof(uint64_t) * new_l1_size;
447: new_l1_table = qemu_mallocz(new_l1_size2);
448: if (!new_l1_table)
449: return -ENOMEM;
450: memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
451:
452:
453: new_l1_table_offset = alloc_clusters(bs, new_l1_size2);
454:
455: for(i = 0; i < s->l1_size; i++)
456: new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
457: ret = bdrv_pwrite(s->hd, new_l1_table_offset, new_l1_table, new_l1_size2);
458: if (ret != new_l1_size2)
459: goto fail;
460: for(i = 0; i < s->l1_size; i++)
461: new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
462:
463:
464: data64 = cpu_to_be64(new_l1_table_offset);
465: if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_table_offset),
466: &data64, sizeof(data64)) != sizeof(data64))
467: goto fail;
468: data32 = cpu_to_be32(new_l1_size);
469: if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size),
470: &data32, sizeof(data32)) != sizeof(data32))
471: goto fail;
472: qemu_free(s->l1_table);
473: free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
474: s->l1_table_offset = new_l1_table_offset;
475: s->l1_table = new_l1_table;
476: s->l1_size = new_l1_size;
477: return 0;
478: fail:
479: qemu_free(s->l1_table);
480: return -EIO;
481: }
482:
483:
484:
485:
486:
487:
488:
489:
490:
491:
492:
493:
494:
495:
496: static uint64_t get_cluster_offset(BlockDriverState *bs,
497: uint64_t offset, int allocate,
498: int compressed_size,
499: int n_start, int n_end)
500: {
501: BDRVQcowState *s = bs->opaque;
502: int min_index, i, j, l1_index, l2_index, ret;
503: uint64_t l2_offset, *l2_table, cluster_offset, tmp, old_l2_offset;
504:
505: l1_index = offset >> (s->l2_bits + s->cluster_bits);
506: if (l1_index >= s->l1_size) {
507:
508: if (!allocate)
509: return 0;
510: if (grow_l1_table(bs, l1_index + 1) < 0)
511: return 0;
512: }
513: l2_offset = s->l1_table[l1_index];
514: if (!l2_offset) {
515: if (!allocate)
516: return 0;
517: l2_allocate:
518: old_l2_offset = l2_offset;
519:
520: l2_offset = alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
521:
522: s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
523: tmp = cpu_to_be64(l2_offset | QCOW_OFLAG_COPIED);
524: if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp),
525: &tmp, sizeof(tmp)) != sizeof(tmp))
526: return 0;
527: min_index = l2_cache_new_entry(bs);
528: l2_table = s->l2_cache + (min_index << s->l2_bits);
529:
530: if (old_l2_offset == 0) {
531: memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
532: } else {
533: if (bdrv_pread(s->hd, old_l2_offset,
534: l2_table, s->l2_size * sizeof(uint64_t)) !=
535: s->l2_size * sizeof(uint64_t))
536: return 0;
537: }
538: if (bdrv_pwrite(s->hd, l2_offset,
539: l2_table, s->l2_size * sizeof(uint64_t)) !=
540: s->l2_size * sizeof(uint64_t))
541: return 0;
542: } else {
543: if (!(l2_offset &a