Home | History | Annotate | Download | only in block
      1 /*
      2  * Block driver for the VMDK format
      3  *
      4  * Copyright (c) 2004 Fabrice Bellard
      5  * Copyright (c) 2005 Filip Navara
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a copy
      8  * of this software and associated documentation files (the "Software"), to deal
      9  * in the Software without restriction, including without limitation the rights
     10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     11  * copies of the Software, and to permit persons to whom the Software is
     12  * furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included in
     15  * all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     23  * THE SOFTWARE.
     24  */
     25 
     26 #include "qemu-common.h"
     27 #include "block_int.h"
     28 #include "module.h"
     29 
     30 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
     31 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
     32 
     33 typedef struct {
     34     uint32_t version;
     35     uint32_t flags;
     36     uint32_t disk_sectors;
     37     uint32_t granularity;
     38     uint32_t l1dir_offset;
     39     uint32_t l1dir_size;
     40     uint32_t file_sectors;
     41     uint32_t cylinders;
     42     uint32_t heads;
     43     uint32_t sectors_per_track;
     44 } VMDK3Header;
     45 
     46 typedef struct {
     47     uint32_t version;
     48     uint32_t flags;
     49     int64_t capacity;
     50     int64_t granularity;
     51     int64_t desc_offset;
     52     int64_t desc_size;
     53     int32_t num_gtes_per_gte;
     54     int64_t rgd_offset;
     55     int64_t gd_offset;
     56     int64_t grain_offset;
     57     char filler[1];
     58     char check_bytes[4];
     59 } __attribute__((packed)) VMDK4Header;
     60 
     61 #define L2_CACHE_SIZE 16
     62 
     63 typedef struct BDRVVmdkState {
     64     BlockDriverState *hd;
     65     int64_t l1_table_offset;
     66     int64_t l1_backup_table_offset;
     67     uint32_t *l1_table;
     68     uint32_t *l1_backup_table;
     69     unsigned int l1_size;
     70     uint32_t l1_entry_sectors;
     71 
     72     unsigned int l2_size;
     73     uint32_t *l2_cache;
     74     uint32_t l2_cache_offsets[L2_CACHE_SIZE];
     75     uint32_t l2_cache_counts[L2_CACHE_SIZE];
     76 
     77     unsigned int cluster_sectors;
     78     uint32_t parent_cid;
     79 } BDRVVmdkState;
     80 
     81 typedef struct VmdkMetaData {
     82     uint32_t offset;
     83     unsigned int l1_index;
     84     unsigned int l2_index;
     85     unsigned int l2_offset;
     86     int valid;
     87 } VmdkMetaData;
     88 
     89 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
     90 {
     91     uint32_t magic;
     92 
     93     if (buf_size < 4)
     94         return 0;
     95     magic = be32_to_cpu(*(uint32_t *)buf);
     96     if (magic == VMDK3_MAGIC ||
     97         magic == VMDK4_MAGIC)
     98         return 100;
     99     else
    100         return 0;
    101 }
    102 
    103 #define CHECK_CID 1
    104 
    105 #define SECTOR_SIZE 512
    106 #define DESC_SIZE 20*SECTOR_SIZE	// 20 sectors of 512 bytes each
    107 #define HEADER_SIZE 512   			// first sector of 512 bytes
    108 
    109 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
    110 {
    111     char desc[DESC_SIZE];
    112     uint32_t cid;
    113     const char *p_name, *cid_str;
    114     size_t cid_str_size;
    115 
    116     /* the descriptor offset = 0x200 */
    117     if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
    118         return 0;
    119 
    120     if (parent) {
    121         cid_str = "parentCID";
    122         cid_str_size = sizeof("parentCID");
    123     } else {
    124         cid_str = "CID";
    125         cid_str_size = sizeof("CID");
    126     }
    127 
    128     if ((p_name = strstr(desc,cid_str)) != NULL) {
    129         p_name += cid_str_size;
    130         sscanf(p_name,"%x",&cid);
    131     }
    132 
    133     return cid;
    134 }
    135 
    136 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
    137 {
    138     char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
    139     char *p_name, *tmp_str;
    140 
    141     /* the descriptor offset = 0x200 */
    142     if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
    143         return -1;
    144 
    145     tmp_str = strstr(desc,"parentCID");
    146     pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
    147     if ((p_name = strstr(desc,"CID")) != NULL) {
    148         p_name += sizeof("CID");
    149         snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
    150         pstrcat(desc, sizeof(desc), tmp_desc);
    151     }
    152 
    153     if (bdrv_pwrite_sync(bs->file, 0x200, desc, DESC_SIZE) < 0)
    154         return -1;
    155     return 0;
    156 }
    157 
    158 static int vmdk_is_cid_valid(BlockDriverState *bs)
    159 {
    160 #ifdef CHECK_CID
    161     BDRVVmdkState *s = bs->opaque;
    162     BlockDriverState *p_bs = bs->backing_hd;
    163     uint32_t cur_pcid;
    164 
    165     if (p_bs) {
    166         cur_pcid = vmdk_read_cid(p_bs,0);
    167         if (s->parent_cid != cur_pcid)
    168             // CID not valid
    169             return 0;
    170     }
    171 #endif
    172     // CID valid
    173     return 1;
    174 }
    175 
    176 static int vmdk_snapshot_create(const char *filename, const char *backing_file)
    177 {
    178     int snp_fd, p_fd;
    179     int ret;
    180     uint32_t p_cid;
    181     char *p_name, *gd_buf, *rgd_buf;
    182     const char *real_filename, *temp_str;
    183     VMDK4Header header;
    184     uint32_t gde_entries, gd_size;
    185     int64_t gd_offset, rgd_offset, capacity, gt_size;
    186     char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
    187     static const char desc_template[] =
    188     "# Disk DescriptorFile\n"
    189     "version=1\n"
    190     "CID=%x\n"
    191     "parentCID=%x\n"
    192     "createType=\"monolithicSparse\"\n"
    193     "parentFileNameHint=\"%s\"\n"
    194     "\n"
    195     "# Extent description\n"
    196     "RW %u SPARSE \"%s\"\n"
    197     "\n"
    198     "# The Disk Data Base \n"
    199     "#DDB\n"
    200     "\n";
    201 
    202     snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
    203     if (snp_fd < 0)
    204         return -errno;
    205     p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
    206     if (p_fd < 0) {
    207         close(snp_fd);
    208         return -errno;
    209     }
    210 
    211     /* read the header */
    212     if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
    213         ret = -errno;
    214         goto fail;
    215     }
    216     if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
    217         ret = -errno;
    218         goto fail;
    219     }
    220 
    221     /* write the header */
    222     if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
    223         ret = -errno;
    224         goto fail;
    225     }
    226     if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
    227         ret = -errno;
    228         goto fail;
    229     }
    230 
    231     memset(&header, 0, sizeof(header));
    232     memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
    233 
    234     if (ftruncate(snp_fd, header.grain_offset << 9)) {
    235         ret = -errno;
    236         goto fail;
    237     }
    238     /* the descriptor offset = 0x200 */
    239     if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
    240         ret = -errno;
    241         goto fail;
    242     }
    243     if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
    244         ret = -errno;
    245         goto fail;
    246     }
    247 
    248     if ((p_name = strstr(p_desc,"CID")) != NULL) {
    249         p_name += sizeof("CID");
    250         sscanf(p_name,"%x",&p_cid);
    251     }
    252 
    253     real_filename = filename;
    254     if ((temp_str = strrchr(real_filename, '\\')) != NULL)
    255         real_filename = temp_str + 1;
    256     if ((temp_str = strrchr(real_filename, '/')) != NULL)
    257         real_filename = temp_str + 1;
    258     if ((temp_str = strrchr(real_filename, ':')) != NULL)
    259         real_filename = temp_str + 1;
    260 
    261     snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
    262              (uint32_t)header.capacity, real_filename);
    263 
    264     /* write the descriptor */
    265     if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
    266         ret = -errno;
    267         goto fail;
    268     }
    269     if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
    270         ret = -errno;
    271         goto fail;
    272     }
    273 
    274     gd_offset = header.gd_offset * SECTOR_SIZE;     // offset of GD table
    275     rgd_offset = header.rgd_offset * SECTOR_SIZE;   // offset of RGD table
    276     capacity = header.capacity * SECTOR_SIZE;       // Extent size
    277     /*
    278      * Each GDE span 32M disk, means:
    279      * 512 GTE per GT, each GTE points to grain
    280      */
    281     gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
    282     if (!gt_size) {
    283         ret = -EINVAL;
    284         goto fail;
    285     }
    286     gde_entries = (uint32_t)(capacity / gt_size);  // number of gde/rgde
    287     gd_size = gde_entries * sizeof(uint32_t);
    288 
    289     /* write RGD */
    290     rgd_buf = qemu_malloc(gd_size);
    291     if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
    292         ret = -errno;
    293         goto fail_rgd;
    294     }
    295     if (read(p_fd, rgd_buf, gd_size) != gd_size) {
    296         ret = -errno;
    297         goto fail_rgd;
    298     }
    299     if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) {
    300         ret = -errno;
    301         goto fail_rgd;
    302     }
    303     if (write(snp_fd, rgd_buf, gd_size) == -1) {
    304         ret = -errno;
    305         goto fail_rgd;
    306     }
    307 
    308     /* write GD */
    309     gd_buf = qemu_malloc(gd_size);
    310     if (lseek(p_fd, gd_offset, SEEK_SET) == -1) {
    311         ret = -errno;
    312         goto fail_gd;
    313     }
    314     if (read(p_fd, gd_buf, gd_size) != gd_size) {
    315         ret = -errno;
    316         goto fail_gd;
    317     }
    318     if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) {
    319         ret = -errno;
    320         goto fail_gd;
    321     }
    322     if (write(snp_fd, gd_buf, gd_size) == -1) {
    323         ret = -errno;
    324         goto fail_gd;
    325     }
    326     ret = 0;
    327 
    328 fail_gd:
    329     qemu_free(gd_buf);
    330 fail_rgd:
    331     qemu_free(rgd_buf);
    332 fail:
    333     close(p_fd);
    334     close(snp_fd);
    335     return ret;
    336 }
    337 
    338 static int vmdk_parent_open(BlockDriverState *bs)
    339 {
    340     char *p_name;
    341     char desc[DESC_SIZE];
    342 
    343     /* the descriptor offset = 0x200 */
    344     if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
    345         return -1;
    346 
    347     if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
    348         char *end_name;
    349 
    350         p_name += sizeof("parentFileNameHint") + 1;
    351         if ((end_name = strchr(p_name,'\"')) == NULL)
    352             return -1;
    353         if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
    354             return -1;
    355 
    356         pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
    357     }
    358 
    359     return 0;
    360 }
    361 
    362 static int vmdk_open(BlockDriverState *bs, int flags)
    363 {
    364     BDRVVmdkState *s = bs->opaque;
    365     uint32_t magic;
    366     int l1_size, i;
    367 
    368     if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
    369         goto fail;
    370 
    371     magic = be32_to_cpu(magic);
    372     if (magic == VMDK3_MAGIC) {
    373         VMDK3Header header;
    374 
    375         if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
    376             goto fail;
    377         s->cluster_sectors = le32_to_cpu(header.granularity);
    378         s->l2_size = 1 << 9;
    379         s->l1_size = 1 << 6;
    380         bs->total_sectors = le32_to_cpu(header.disk_sectors);
    381         s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
    382         s->l1_backup_table_offset = 0;
    383         s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
    384     } else if (magic == VMDK4_MAGIC) {
    385         VMDK4Header header;
    386 
    387         if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
    388             goto fail;
    389         bs->total_sectors = le64_to_cpu(header.capacity);
    390         s->cluster_sectors = le64_to_cpu(header.granularity);
    391         s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
    392         s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
    393         if (s->l1_entry_sectors <= 0)
    394             goto fail;
    395         s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
    396             / s->l1_entry_sectors;
    397         s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
    398         s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
    399 
    400         // try to open parent images, if exist
    401         if (vmdk_parent_open(bs) != 0)
    402             goto fail;
    403         // write the CID once after the image creation
    404         s->parent_cid = vmdk_read_cid(bs,1);
    405     } else {
    406         goto fail;
    407     }
    408 
    409     /* read the L1 table */
    410     l1_size = s->l1_size * sizeof(uint32_t);
    411     s->l1_table = qemu_malloc(l1_size);
    412     if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
    413         goto fail;
    414     for(i = 0; i < s->l1_size; i++) {
    415         le32_to_cpus(&s->l1_table[i]);
    416     }
    417 
    418     if (s->l1_backup_table_offset) {
    419         s->l1_backup_table = qemu_malloc(l1_size);
    420         if (bdrv_pread(bs->file, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
    421             goto fail;
    422         for(i = 0; i < s->l1_size; i++) {
    423             le32_to_cpus(&s->l1_backup_table[i]);
    424         }
    425     }
    426 
    427     s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
    428     return 0;
    429  fail:
    430     qemu_free(s->l1_backup_table);
    431     qemu_free(s->l1_table);
    432     qemu_free(s->l2_cache);
    433     return -1;
    434 }
    435 
    436 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
    437                                    uint64_t offset, int allocate);
    438 
    439 static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
    440                              uint64_t offset, int allocate)
    441 {
    442     BDRVVmdkState *s = bs->opaque;
    443     uint8_t  whole_grain[s->cluster_sectors*512];        // 128 sectors * 512 bytes each = grain size 64KB
    444 
    445     // we will be here if it's first write on non-exist grain(cluster).
    446     // try to read from parent image, if exist
    447     if (bs->backing_hd) {
    448         int ret;
    449 
    450         if (!vmdk_is_cid_valid(bs))
    451             return -1;
    452 
    453         ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
    454             s->cluster_sectors);
    455         if (ret < 0) {
    456             return -1;
    457         }
    458 
    459         //Write grain only into the active image
    460         ret = bdrv_write(bs->file, cluster_offset, whole_grain,
    461             s->cluster_sectors);
    462         if (ret < 0) {
    463             return -1;
    464         }
    465     }
    466     return 0;
    467 }
    468 
    469 static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
    470 {
    471     BDRVVmdkState *s = bs->opaque;
    472 
    473     /* update L2 table */
    474     if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
    475                     &(m_data->offset), sizeof(m_data->offset)) < 0)
    476         return -1;
    477     /* update backup L2 table */
    478     if (s->l1_backup_table_offset != 0) {
    479         m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
    480         if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
    481                         &(m_data->offset), sizeof(m_data->offset)) < 0)
    482             return -1;
    483     }
    484 
    485     return 0;
    486 }
    487 
    488 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
    489                                    uint64_t offset, int allocate)
    490 {
    491     BDRVVmdkState *s = bs->opaque;
    492     unsigned int l1_index, l2_offset, l2_index;
    493     int min_index, i, j;
    494     uint32_t min_count, *l2_table, tmp = 0;
    495     uint64_t cluster_offset;
    496 
    497     if (m_data)
    498         m_data->valid = 0;
    499 
    500     l1_index = (offset >> 9) / s->l1_entry_sectors;
    501     if (l1_index >= s->l1_size)
    502         return 0;
    503     l2_offset = s->l1_table[l1_index];
    504     if (!l2_offset)
    505         return 0;
    506     for(i = 0; i < L2_CACHE_SIZE; i++) {
    507         if (l2_offset == s->l2_cache_offsets[i]) {
    508             /* increment the hit count */
    509             if (++s->l2_cache_counts[i] == 0xffffffff) {
    510                 for(j = 0; j < L2_CACHE_SIZE; j++) {
    511                     s->l2_cache_counts[j] >>= 1;
    512                 }
    513             }
    514             l2_table = s->l2_cache + (i * s->l2_size);
    515             goto found;
    516         }
    517     }
    518     /* not found: load a new entry in the least used one */
    519     min_index = 0;
    520     min_count = 0xffffffff;
    521     for(i = 0; i < L2_CACHE_SIZE; i++) {
    522         if (s->l2_cache_counts[i] < min_count) {
    523             min_count = s->l2_cache_counts[i];
    524             min_index = i;
    525         }
    526     }
    527     l2_table = s->l2_cache + (min_index * s->l2_size);
    528     if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
    529                                                                         s->l2_size * sizeof(uint32_t))
    530         return 0;
    531 
    532     s->l2_cache_offsets[min_index] = l2_offset;
    533     s->l2_cache_counts[min_index] = 1;
    534  found:
    535     l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
    536     cluster_offset = le32_to_cpu(l2_table[l2_index]);
    537 
    538     if (!cluster_offset) {
    539         if (!allocate)
    540             return 0;
    541 
    542         // Avoid the L2 tables update for the images that have snapshots.
    543         cluster_offset = bdrv_getlength(bs->file);
    544         bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9));
    545 
    546         cluster_offset >>= 9;
    547         tmp = cpu_to_le32(cluster_offset);
    548         l2_table[l2_index] = tmp;
    549 
    550         /* First of all we write grain itself, to avoid race condition
    551          * that may to corrupt the image.
    552          * This problem may occur because of insufficient space on host disk
    553          * or inappropriate VM shutdown.
    554          */
    555         if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
    556             return 0;
    557 
    558         if (m_data) {
    559             m_data->offset = tmp;
    560             m_data->l1_index = l1_index;
    561             m_data->l2_index = l2_index;
    562             m_data->l2_offset = l2_offset;
    563             m_data->valid = 1;
    564         }
    565     }
    566     cluster_offset <<= 9;
    567     return cluster_offset;
    568 }
    569 
    570 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
    571                              int nb_sectors, int *pnum)
    572 {
    573     BDRVVmdkState *s = bs->opaque;
    574     int index_in_cluster, n;
    575     uint64_t cluster_offset;
    576 
    577     cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
    578     index_in_cluster = sector_num % s->cluster_sectors;
    579     n = s->cluster_sectors - index_in_cluster;
    580     if (n > nb_sectors)
    581         n = nb_sectors;
    582     *pnum = n;
    583     return (cluster_offset != 0);
    584 }
    585 
    586 static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
    587                     uint8_t *buf, int nb_sectors)
    588 {
    589     BDRVVmdkState *s = bs->opaque;
    590     int index_in_cluster, n, ret;
    591     uint64_t cluster_offset;
    592 
    593     while (nb_sectors > 0) {
    594         cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
    595         index_in_cluster = sector_num % s->cluster_sectors;
    596         n = s->cluster_sectors - index_in_cluster;
    597         if (n > nb_sectors)
    598             n = nb_sectors;
    599         if (!cluster_offset) {
    600             // try to read from parent image, if exist
    601             if (bs->backing_hd) {
    602                 if (!vmdk_is_cid_valid(bs))
    603                     return -1;
    604                 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
    605                 if (ret < 0)
    606                     return -1;
    607             } else {
    608                 memset(buf, 0, 512 * n);
    609             }
    610         } else {
    611             if(bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
    612                 return -1;
    613         }
    614         nb_sectors -= n;
    615         sector_num += n;
    616         buf += n * 512;
    617     }
    618     return 0;
    619 }
    620 
    621 static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
    622                      const uint8_t *buf, int nb_sectors)
    623 {
    624     BDRVVmdkState *s = bs->opaque;
    625     VmdkMetaData m_data;
    626     int index_in_cluster, n;
    627     uint64_t cluster_offset;
    628     static int cid_update = 0;
    629 
    630     if (sector_num > bs->total_sectors) {
    631         fprintf(stderr,
    632                 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
    633                 " total_sectors=0x%" PRIx64 "\n",
    634                 sector_num, bs->total_sectors);
    635         return -1;
    636     }
    637 
    638     while (nb_sectors > 0) {
    639         index_in_cluster = sector_num & (s->cluster_sectors - 1);
    640         n = s->cluster_sectors - index_in_cluster;
    641         if (n > nb_sectors)
    642             n = nb_sectors;
    643         cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
    644         if (!cluster_offset)
    645             return -1;
    646 
    647         if (bdrv_pwrite(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
    648             return -1;
    649         if (m_data.valid) {
    650             /* update L2 tables */
    651             if (vmdk_L2update(bs, &m_data) == -1)
    652                 return -1;
    653         }
    654         nb_sectors -= n;
    655         sector_num += n;
    656         buf += n * 512;
    657 
    658         // update CID on the first write every time the virtual disk is opened
    659         if (!cid_update) {
    660             vmdk_write_cid(bs, time(NULL));
    661             cid_update++;
    662         }
    663     }
    664     return 0;
    665 }
    666 
    667 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
    668 {
    669     int fd, i;
    670     VMDK4Header header;
    671     uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
    672     static const char desc_template[] =
    673         "# Disk DescriptorFile\n"
    674         "version=1\n"
    675         "CID=%x\n"
    676         "parentCID=ffffffff\n"
    677         "createType=\"monolithicSparse\"\n"
    678         "\n"
    679         "# Extent description\n"
    680         "RW %" PRId64 " SPARSE \"%s\"\n"
    681         "\n"
    682         "# The Disk Data Base \n"
    683         "#DDB\n"
    684         "\n"
    685         "ddb.virtualHWVersion = \"%d\"\n"
    686         "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
    687         "ddb.geometry.heads = \"16\"\n"
    688         "ddb.geometry.sectors = \"63\"\n"
    689         "ddb.adapterType = \"ide\"\n";
    690     char desc[1024];
    691     const char *real_filename, *temp_str;
    692     int64_t total_size = 0;
    693     const char *backing_file = NULL;
    694     int flags = 0;
    695     int ret;
    696 
    697     // Read out options
    698     while (options && options->name) {
    699         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
    700             total_size = options->value.n / 512;
    701         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
    702             backing_file = options->value.s;
    703         } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
    704             flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
    705         }
    706         options++;
    707     }
    708 
    709     /* XXX: add support for backing file */
    710     if (backing_file) {
    711         return vmdk_snapshot_create(filename, backing_file);
    712     }
    713 
    714     fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
    715               0644);
    716     if (fd < 0)
    717         return -errno;
    718     magic = cpu_to_be32(VMDK4_MAGIC);
    719     memset(&header, 0, sizeof(header));
    720     header.version = cpu_to_le32(1);
    721     header.flags = cpu_to_le32(3); /* ?? */
    722     header.capacity = cpu_to_le64(total_size);
    723     header.granularity = cpu_to_le64(128);
    724     header.num_gtes_per_gte = cpu_to_le32(512);
    725 
    726     grains = (total_size + header.granularity - 1) / header.granularity;
    727     gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
    728     gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
    729     gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
    730 
    731     header.desc_offset = 1;
    732     header.desc_size = 20;
    733     header.rgd_offset = header.desc_offset + header.desc_size;
    734     header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
    735     header.grain_offset =
    736        ((header.gd_offset + gd_size + (gt_size * gt_count) +
    737          header.granularity - 1) / header.granularity) *
    738         header.granularity;
    739 
    740     header.desc_offset = cpu_to_le64(header.desc_offset);
    741     header.desc_size = cpu_to_le64(header.desc_size);
    742     header.rgd_offset = cpu_to_le64(header.rgd_offset);
    743     header.gd_offset = cpu_to_le64(header.gd_offset);
    744     header.grain_offset = cpu_to_le64(header.grain_offset);
    745 
    746     header.check_bytes[0] = 0xa;
    747     header.check_bytes[1] = 0x20;
    748     header.check_bytes[2] = 0xd;
    749     header.check_bytes[3] = 0xa;
    750 
    751     /* write all the data */
    752     ret = qemu_write_full(fd, &magic, sizeof(magic));
    753     if (ret != sizeof(magic)) {
    754         ret = -errno;
    755         goto exit;
    756     }
    757     ret = qemu_write_full(fd, &header, sizeof(header));
    758     if (ret != sizeof(header)) {
    759         ret = -errno;
    760         goto exit;
    761     }
    762 
    763     ret = ftruncate(fd, header.grain_offset << 9);
    764     if (ret < 0) {
    765         ret = -errno;
    766         goto exit;
    767     }
    768 
    769     /* write grain directory */
    770     lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
    771     for (i = 0, tmp = header.rgd_offset + gd_size;
    772          i < gt_count; i++, tmp += gt_size) {
    773         ret = qemu_write_full(fd, &tmp, sizeof(tmp));
    774         if (ret != sizeof(tmp)) {
    775             ret = -errno;
    776             goto exit;
    777         }
    778     }
    779 
    780     /* write backup grain directory */
    781     lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
    782     for (i = 0, tmp = header.gd_offset + gd_size;
    783          i < gt_count; i++, tmp += gt_size) {
    784         ret = qemu_write_full(fd, &tmp, sizeof(tmp));
    785         if (ret != sizeof(tmp)) {
    786             ret = -errno;
    787             goto exit;
    788         }
    789     }
    790 
    791     /* compose the descriptor */
    792     real_filename = filename;
    793     if ((temp_str = strrchr(real_filename, '\\')) != NULL)
    794         real_filename = temp_str + 1;
    795     if ((temp_str = strrchr(real_filename, '/')) != NULL)
    796         real_filename = temp_str + 1;
    797     if ((temp_str = strrchr(real_filename, ':')) != NULL)
    798         real_filename = temp_str + 1;
    799     snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
    800              total_size, real_filename,
    801              (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
    802              total_size / (int64_t)(63 * 16));
    803 
    804     /* write the descriptor */
    805     lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
    806     ret = qemu_write_full(fd, desc, strlen(desc));
    807     if (ret != strlen(desc)) {
    808         ret = -errno;
    809         goto exit;
    810     }
    811 
    812     ret = 0;
    813 exit:
    814     close(fd);
    815     return ret;
    816 }
    817 
    818 static void vmdk_close(BlockDriverState *bs)
    819 {
    820     BDRVVmdkState *s = bs->opaque;
    821 
    822     qemu_free(s->l1_table);
    823     qemu_free(s->l2_cache);
    824 }
    825 
    826 static void vmdk_flush(BlockDriverState *bs)
    827 {
    828     bdrv_flush(bs->file);
    829 }
    830 
    831 
    832 static QEMUOptionParameter vmdk_create_options[] = {
    833     {
    834         .name = BLOCK_OPT_SIZE,
    835         .type = OPT_SIZE,
    836         .help = "Virtual disk size"
    837     },
    838     {
    839         .name = BLOCK_OPT_BACKING_FILE,
    840         .type = OPT_STRING,
    841         .help = "File name of a base image"
    842     },
    843     {
    844         .name = BLOCK_OPT_COMPAT6,
    845         .type = OPT_FLAG,
    846         .help = "VMDK version 6 image"
    847     },
    848     { NULL }
    849 };
    850 
    851 static BlockDriver bdrv_vmdk = {
    852     .format_name	= "vmdk",
    853     .instance_size	= sizeof(BDRVVmdkState),
    854     .bdrv_probe		= vmdk_probe,
    855     .bdrv_open      = vmdk_open,
    856     .bdrv_read		= vmdk_read,
    857     .bdrv_write		= vmdk_write,
    858     .bdrv_close		= vmdk_close,
    859     .bdrv_create	= vmdk_create,
    860     .bdrv_flush		= vmdk_flush,
    861     .bdrv_is_allocated	= vmdk_is_allocated,
    862 
    863     .create_options = vmdk_create_options,
    864 };
    865 
    866 static void bdrv_vmdk_init(void)
    867 {
    868     bdrv_register(&bdrv_vmdk);
    869 }
    870 
    871 block_init(bdrv_vmdk_init);
    872