Home | History | Annotate | Download | only in block
      1 /*
      2  * Block driver for the VMDK format
      3  *
      4  * Copyright (c) 2004 Fabrice Bellard
      5  * Copyright (c) 2005 Filip Navara
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a copy
      8  * of this software and associated documentation files (the "Software"), to deal
      9  * in the Software without restriction, including without limitation the rights
     10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     11  * copies of the Software, and to permit persons to whom the Software is
     12  * furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included in
     15  * all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     23  * THE SOFTWARE.
     24  */
     25 
     26 #include "qemu-common.h"
     27 #include "block_int.h"
     28 #include "module.h"
     29 
     30 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
     31 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
     32 
     33 typedef struct {
     34     uint32_t version;
     35     uint32_t flags;
     36     uint32_t disk_sectors;
     37     uint32_t granularity;
     38     uint32_t l1dir_offset;
     39     uint32_t l1dir_size;
     40     uint32_t file_sectors;
     41     uint32_t cylinders;
     42     uint32_t heads;
     43     uint32_t sectors_per_track;
     44 } VMDK3Header;
     45 
     46 typedef struct {
     47     uint32_t version;
     48     uint32_t flags;
     49     int64_t capacity;
     50     int64_t granularity;
     51     int64_t desc_offset;
     52     int64_t desc_size;
     53     int32_t num_gtes_per_gte;
     54     int64_t rgd_offset;
     55     int64_t gd_offset;
     56     int64_t grain_offset;
     57     char filler[1];
     58     char check_bytes[4];
     59 } __attribute__((packed)) VMDK4Header;
     60 
     61 #define L2_CACHE_SIZE 16
     62 
     63 typedef struct BDRVVmdkState {
     64     BlockDriverState *hd;
     65     int64_t l1_table_offset;
     66     int64_t l1_backup_table_offset;
     67     uint32_t *l1_table;
     68     uint32_t *l1_backup_table;
     69     unsigned int l1_size;
     70     uint32_t l1_entry_sectors;
     71 
     72     unsigned int l2_size;
     73     uint32_t *l2_cache;
     74     uint32_t l2_cache_offsets[L2_CACHE_SIZE];
     75     uint32_t l2_cache_counts[L2_CACHE_SIZE];
     76 
     77     unsigned int cluster_sectors;
     78     uint32_t parent_cid;
     79     int is_parent;
     80 } BDRVVmdkState;
     81 
     82 typedef struct VmdkMetaData {
     83     uint32_t offset;
     84     unsigned int l1_index;
     85     unsigned int l2_index;
     86     unsigned int l2_offset;
     87     int valid;
     88 } VmdkMetaData;
     89 
     90 typedef struct ActiveBDRVState{
     91     BlockDriverState *hd;            // active image handler
     92     uint64_t cluster_offset;         // current write offset
     93 }ActiveBDRVState;
     94 
     95 static ActiveBDRVState activeBDRV;
     96 
     97 
     98 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
     99 {
    100     uint32_t magic;
    101 
    102     if (buf_size < 4)
    103         return 0;
    104     magic = be32_to_cpu(*(uint32_t *)buf);
    105     if (magic == VMDK3_MAGIC ||
    106         magic == VMDK4_MAGIC)
    107         return 100;
    108     else
    109         return 0;
    110 }
    111 
    112 #define CHECK_CID 1
    113 
    114 #define SECTOR_SIZE 512
    115 #define DESC_SIZE 20*SECTOR_SIZE	// 20 sectors of 512 bytes each
    116 #define HEADER_SIZE 512   			// first sector of 512 bytes
    117 
    118 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
    119 {
    120     BDRVVmdkState *s = bs->opaque;
    121     char desc[DESC_SIZE];
    122     uint32_t cid;
    123     const char *p_name, *cid_str;
    124     size_t cid_str_size;
    125 
    126     /* the descriptor offset = 0x200 */
    127     if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
    128         return 0;
    129 
    130     if (parent) {
    131         cid_str = "parentCID";
    132         cid_str_size = sizeof("parentCID");
    133     } else {
    134         cid_str = "CID";
    135         cid_str_size = sizeof("CID");
    136     }
    137 
    138     if ((p_name = strstr(desc,cid_str)) != NULL) {
    139         p_name += cid_str_size;
    140         sscanf(p_name,"%x",&cid);
    141     }
    142 
    143     return cid;
    144 }
    145 
    146 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
    147 {
    148     BDRVVmdkState *s = bs->opaque;
    149     char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
    150     char *p_name, *tmp_str;
    151 
    152     /* the descriptor offset = 0x200 */
    153     if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
    154         return -1;
    155 
    156     tmp_str = strstr(desc,"parentCID");
    157     pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
    158     if ((p_name = strstr(desc,"CID")) != NULL) {
    159         p_name += sizeof("CID");
    160         snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
    161         pstrcat(desc, sizeof(desc), tmp_desc);
    162     }
    163 
    164     if (bdrv_pwrite(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
    165         return -1;
    166     return 0;
    167 }
    168 
    169 static int vmdk_is_cid_valid(BlockDriverState *bs)
    170 {
    171 #ifdef CHECK_CID
    172     BDRVVmdkState *s = bs->opaque;
    173     BlockDriverState *p_bs = s->hd->backing_hd;
    174     uint32_t cur_pcid;
    175 
    176     if (p_bs) {
    177         cur_pcid = vmdk_read_cid(p_bs,0);
    178         if (s->parent_cid != cur_pcid)
    179             // CID not valid
    180             return 0;
    181     }
    182 #endif
    183     // CID valid
    184     return 1;
    185 }
    186 
    187 static int vmdk_snapshot_create(const char *filename, const char *backing_file)
    188 {
    189     int snp_fd, p_fd;
    190     uint32_t p_cid;
    191     char *p_name, *gd_buf, *rgd_buf;
    192     const char *real_filename, *temp_str;
    193     VMDK4Header header;
    194     uint32_t gde_entries, gd_size;
    195     int64_t gd_offset, rgd_offset, capacity, gt_size;
    196     char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
    197     static const char desc_template[] =
    198     "# Disk DescriptorFile\n"
    199     "version=1\n"
    200     "CID=%x\n"
    201     "parentCID=%x\n"
    202     "createType=\"monolithicSparse\"\n"
    203     "parentFileNameHint=\"%s\"\n"
    204     "\n"
    205     "# Extent description\n"
    206     "RW %u SPARSE \"%s\"\n"
    207     "\n"
    208     "# The Disk Data Base \n"
    209     "#DDB\n"
    210     "\n";
    211 
    212     snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
    213     if (snp_fd < 0)
    214         return -1;
    215     p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
    216     if (p_fd < 0) {
    217         close(snp_fd);
    218         return -1;
    219     }
    220 
    221     /* read the header */
    222     if (lseek(p_fd, 0x0, SEEK_SET) == -1)
    223         goto fail;
    224     if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE)
    225         goto fail;
    226 
    227     /* write the header */
    228     if (lseek(snp_fd, 0x0, SEEK_SET) == -1)
    229         goto fail;
    230     if (write(snp_fd, hdr, HEADER_SIZE) == -1)
    231         goto fail;
    232 
    233     memset(&header, 0, sizeof(header));
    234     memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
    235 
    236     ftruncate(snp_fd, header.grain_offset << 9);
    237     /* the descriptor offset = 0x200 */
    238     if (lseek(p_fd, 0x200, SEEK_SET) == -1)
    239         goto fail;
    240     if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE)
    241         goto fail;
    242 
    243     if ((p_name = strstr(p_desc,"CID")) != NULL) {
    244         p_name += sizeof("CID");
    245         sscanf(p_name,"%x",&p_cid);
    246     }
    247 
    248     real_filename = filename;
    249     if ((temp_str = strrchr(real_filename, '\\')) != NULL)
    250         real_filename = temp_str + 1;
    251     if ((temp_str = strrchr(real_filename, '/')) != NULL)
    252         real_filename = temp_str + 1;
    253     if ((temp_str = strrchr(real_filename, ':')) != NULL)
    254         real_filename = temp_str + 1;
    255 
    256     snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
    257              (uint32_t)header.capacity, real_filename);
    258 
    259     /* write the descriptor */
    260     if (lseek(snp_fd, 0x200, SEEK_SET) == -1)
    261         goto fail;
    262     if (write(snp_fd, s_desc, strlen(s_desc)) == -1)
    263         goto fail;
    264 
    265     gd_offset = header.gd_offset * SECTOR_SIZE;     // offset of GD table
    266     rgd_offset = header.rgd_offset * SECTOR_SIZE;   // offset of RGD table
    267     capacity = header.capacity * SECTOR_SIZE;       // Extent size
    268     /*
    269      * Each GDE span 32M disk, means:
    270      * 512 GTE per GT, each GTE points to grain
    271      */
    272     gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
    273     if (!gt_size)
    274         goto fail;
    275     gde_entries = (uint32_t)(capacity / gt_size);  // number of gde/rgde
    276     gd_size = gde_entries * sizeof(uint32_t);
    277 
    278     /* write RGD */
    279     rgd_buf = qemu_malloc(gd_size);
    280     if (lseek(p_fd, rgd_offset, SEEK_SET) == -1)
    281         goto fail_rgd;
    282     if (read(p_fd, rgd_buf, gd_size) != gd_size)
    283         goto fail_rgd;
    284     if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1)
    285         goto fail_rgd;
    286     if (write(snp_fd, rgd_buf, gd_size) == -1)
    287         goto fail_rgd;
    288     qemu_free(rgd_buf);
    289 
    290     /* write GD */
    291     gd_buf = qemu_malloc(gd_size);
    292     if (lseek(p_fd, gd_offset, SEEK_SET) == -1)
    293         goto fail_gd;
    294     if (read(p_fd, gd_buf, gd_size) != gd_size)
    295         goto fail_gd;
    296     if (lseek(snp_fd, gd_offset, SEEK_SET) == -1)
    297         goto fail_gd;
    298     if (write(snp_fd, gd_buf, gd_size) == -1)
    299         goto fail_gd;
    300     qemu_free(gd_buf);
    301 
    302     close(p_fd);
    303     close(snp_fd);
    304     return 0;
    305 
    306     fail_gd:
    307     qemu_free(gd_buf);
    308     fail_rgd:
    309     qemu_free(rgd_buf);
    310     fail:
    311     close(p_fd);
    312     close(snp_fd);
    313     return -1;
    314 }
    315 
    316 static void vmdk_parent_close(BlockDriverState *bs)
    317 {
    318     if (bs->backing_hd)
    319         bdrv_close(bs->backing_hd);
    320 }
    321 
    322 static int parent_open = 0;
    323 static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
    324 {
    325     BDRVVmdkState *s = bs->opaque;
    326     char *p_name;
    327     char desc[DESC_SIZE];
    328     char parent_img_name[1024];
    329 
    330     /* the descriptor offset = 0x200 */
    331     if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
    332         return -1;
    333 
    334     if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
    335         char *end_name;
    336         struct stat file_buf;
    337 
    338         p_name += sizeof("parentFileNameHint") + 1;
    339         if ((end_name = strchr(p_name,'\"')) == NULL)
    340             return -1;
    341         if ((end_name - p_name) > sizeof (s->hd->backing_file) - 1)
    342             return -1;
    343 
    344         pstrcpy(s->hd->backing_file, end_name - p_name + 1, p_name);
    345         if (stat(s->hd->backing_file, &file_buf) != 0) {
    346             path_combine(parent_img_name, sizeof(parent_img_name),
    347                          filename, s->hd->backing_file);
    348         } else {
    349             pstrcpy(parent_img_name, sizeof(parent_img_name),
    350                     s->hd->backing_file);
    351         }
    352 
    353         s->hd->backing_hd = bdrv_new("");
    354         if (!s->hd->backing_hd) {
    355             failure:
    356             bdrv_close(s->hd);
    357             return -1;
    358         }
    359         parent_open = 1;
    360         if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0)
    361             goto failure;
    362         parent_open = 0;
    363     }
    364 
    365     return 0;
    366 }
    367 
    368 static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
    369 {
    370     BDRVVmdkState *s = bs->opaque;
    371     uint32_t magic;
    372     int l1_size, i, ret;
    373 
    374     if (parent_open)
    375         // Parent must be opened as RO.
    376         flags = BDRV_O_RDONLY;
    377 
    378     ret = bdrv_file_open(&s->hd, filename, flags);
    379     if (ret < 0)
    380         return ret;
    381     if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic))
    382         goto fail;
    383 
    384     magic = be32_to_cpu(magic);
    385     if (magic == VMDK3_MAGIC) {
    386         VMDK3Header header;
    387 
    388         if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
    389             goto fail;
    390         s->cluster_sectors = le32_to_cpu(header.granularity);
    391         s->l2_size = 1 << 9;
    392         s->l1_size = 1 << 6;
    393         bs->total_sectors = le32_to_cpu(header.disk_sectors);
    394         s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
    395         s->l1_backup_table_offset = 0;
    396         s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
    397     } else if (magic == VMDK4_MAGIC) {
    398         VMDK4Header header;
    399 
    400         if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
    401             goto fail;
    402         bs->total_sectors = le64_to_cpu(header.capacity);
    403         s->cluster_sectors = le64_to_cpu(header.granularity);
    404         s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
    405         s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
    406         if (s->l1_entry_sectors <= 0)
    407             goto fail;
    408         s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
    409             / s->l1_entry_sectors;
    410         s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
    411         s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
    412 
    413         if (parent_open)
    414             s->is_parent = 1;
    415         else
    416             s->is_parent = 0;
    417 
    418         // try to open parent images, if exist
    419         if (vmdk_parent_open(bs, filename) != 0)
    420             goto fail;
    421         // write the CID once after the image creation
    422         s->parent_cid = vmdk_read_cid(bs,1);
    423     } else {
    424         goto fail;
    425     }
    426 
    427     /* read the L1 table */
    428     l1_size = s->l1_size * sizeof(uint32_t);
    429     s->l1_table = qemu_malloc(l1_size);
    430     if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
    431         goto fail;
    432     for(i = 0; i < s->l1_size; i++) {
    433         le32_to_cpus(&s->l1_table[i]);
    434     }
    435 
    436     if (s->l1_backup_table_offset) {
    437         s->l1_backup_table = qemu_malloc(l1_size);
    438         if (bdrv_pread(s->hd, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
    439             goto fail;
    440         for(i = 0; i < s->l1_size; i++) {
    441             le32_to_cpus(&s->l1_backup_table[i]);
    442         }
    443     }
    444 
    445     s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
    446     return 0;
    447  fail:
    448     qemu_free(s->l1_backup_table);
    449     qemu_free(s->l1_table);
    450     qemu_free(s->l2_cache);
    451     bdrv_delete(s->hd);
    452     return -1;
    453 }
    454 
    455 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
    456                                    uint64_t offset, int allocate);
    457 
    458 static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
    459                              uint64_t offset, int allocate)
    460 {
    461     uint64_t parent_cluster_offset;
    462     BDRVVmdkState *s = bs->opaque;
    463     uint8_t  whole_grain[s->cluster_sectors*512];        // 128 sectors * 512 bytes each = grain size 64KB
    464 
    465     // we will be here if it's first write on non-exist grain(cluster).
    466     // try to read from parent image, if exist
    467     if (s->hd->backing_hd) {
    468         BDRVVmdkState *ps = s->hd->backing_hd->opaque;
    469 
    470         if (!vmdk_is_cid_valid(bs))
    471             return -1;
    472 
    473         parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate);
    474 
    475         if (parent_cluster_offset) {
    476             BDRVVmdkState *act_s = activeBDRV.hd->opaque;
    477 
    478             if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512)
    479                 return -1;
    480 
    481             //Write grain only into the active image
    482             if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain))
    483                 return -1;
    484         }
    485     }
    486     return 0;
    487 }
    488 
    489 static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
    490 {
    491     BDRVVmdkState *s = bs->opaque;
    492 
    493     /* update L2 table */
    494     if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
    495                     &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
    496         return -1;
    497     /* update backup L2 table */
    498     if (s->l1_backup_table_offset != 0) {
    499         m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
    500         if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
    501                         &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
    502             return -1;
    503     }
    504 
    505     return 0;
    506 }
    507 
    508 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
    509                                    uint64_t offset, int allocate)
    510 {
    511     BDRVVmdkState *s = bs->opaque;
    512     unsigned int l1_index, l2_offset, l2_index;
    513     int min_index, i, j;
    514     uint32_t min_count, *l2_table, tmp = 0;
    515     uint64_t cluster_offset;
    516 
    517     if (m_data)
    518         m_data->valid = 0;
    519 
    520     l1_index = (offset >> 9) / s->l1_entry_sectors;
    521     if (l1_index >= s->l1_size)
    522         return 0;
    523     l2_offset = s->l1_table[l1_index];
    524     if (!l2_offset)
    525         return 0;
    526     for(i = 0; i < L2_CACHE_SIZE; i++) {
    527         if (l2_offset == s->l2_cache_offsets[i]) {
    528             /* increment the hit count */
    529             if (++s->l2_cache_counts[i] == 0xffffffff) {
    530                 for(j = 0; j < L2_CACHE_SIZE; j++) {
    531                     s->l2_cache_counts[j] >>= 1;
    532                 }
    533             }
    534             l2_table = s->l2_cache + (i * s->l2_size);
    535             goto found;
    536         }
    537     }
    538     /* not found: load a new entry in the least used one */
    539     min_index = 0;
    540     min_count = 0xffffffff;
    541     for(i = 0; i < L2_CACHE_SIZE; i++) {
    542         if (s->l2_cache_counts[i] < min_count) {
    543             min_count = s->l2_cache_counts[i];
    544             min_index = i;
    545         }
    546     }
    547     l2_table = s->l2_cache + (min_index * s->l2_size);
    548     if (bdrv_pread(s->hd, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
    549                                                                         s->l2_size * sizeof(uint32_t))
    550         return 0;
    551 
    552     s->l2_cache_offsets[min_index] = l2_offset;
    553     s->l2_cache_counts[min_index] = 1;
    554  found:
    555     l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
    556     cluster_offset = le32_to_cpu(l2_table[l2_index]);
    557 
    558     if (!cluster_offset) {
    559         if (!allocate)
    560             return 0;
    561         // Avoid the L2 tables update for the images that have snapshots.
    562         if (!s->is_parent) {
    563             cluster_offset = bdrv_getlength(s->hd);
    564             bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
    565 
    566             cluster_offset >>= 9;
    567             tmp = cpu_to_le32(cluster_offset);
    568             l2_table[l2_index] = tmp;
    569             // Save the active image state
    570             activeBDRV.cluster_offset = cluster_offset;
    571             activeBDRV.hd = bs;
    572         }
    573         /* First of all we write grain itself, to avoid race condition
    574          * that may to corrupt the image.
    575          * This problem may occur because of insufficient space on host disk
    576          * or inappropriate VM shutdown.
    577          */
    578         if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
    579             return 0;
    580 
    581         if (m_data) {
    582             m_data->offset = tmp;
    583             m_data->l1_index = l1_index;
    584             m_data->l2_index = l2_index;
    585             m_data->l2_offset = l2_offset;
    586             m_data->valid = 1;
    587         }
    588     }
    589     cluster_offset <<= 9;
    590     return cluster_offset;
    591 }
    592 
    593 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
    594                              int nb_sectors, int *pnum)
    595 {
    596     BDRVVmdkState *s = bs->opaque;
    597     int index_in_cluster, n;
    598     uint64_t cluster_offset;
    599 
    600     cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
    601     index_in_cluster = sector_num % s->cluster_sectors;
    602     n = s->cluster_sectors - index_in_cluster;
    603     if (n > nb_sectors)
    604         n = nb_sectors;
    605     *pnum = n;
    606     return (cluster_offset != 0);
    607 }
    608 
    609 static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
    610                     uint8_t *buf, int nb_sectors)
    611 {
    612     BDRVVmdkState *s = bs->opaque;
    613     int index_in_cluster, n, ret;
    614     uint64_t cluster_offset;
    615 
    616     while (nb_sectors > 0) {
    617         cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
    618         index_in_cluster = sector_num % s->cluster_sectors;
    619         n = s->cluster_sectors - index_in_cluster;
    620         if (n > nb_sectors)
    621             n = nb_sectors;
    622         if (!cluster_offset) {
    623             // try to read from parent image, if exist
    624             if (s->hd->backing_hd) {
    625                 if (!vmdk_is_cid_valid(bs))
    626                     return -1;
    627                 ret = bdrv_read(s->hd->backing_hd, sector_num, buf, n);
    628                 if (ret < 0)
    629                     return -1;
    630             } else {
    631                 memset(buf, 0, 512 * n);
    632             }
    633         } else {
    634             if(bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
    635                 return -1;
    636         }
    637         nb_sectors -= n;
    638         sector_num += n;
    639         buf += n * 512;
    640     }
    641     return 0;
    642 }
    643 
    644 static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
    645                      const uint8_t *buf, int nb_sectors)
    646 {
    647     BDRVVmdkState *s = bs->opaque;
    648     VmdkMetaData m_data;
    649     int index_in_cluster, n;
    650     uint64_t cluster_offset;
    651     static int cid_update = 0;
    652 
    653     if (sector_num > bs->total_sectors) {
    654         fprintf(stderr,
    655                 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
    656                 " total_sectors=0x%" PRIx64 "\n",
    657                 sector_num, bs->total_sectors);
    658         return -1;
    659     }
    660 
    661     while (nb_sectors > 0) {
    662         index_in_cluster = sector_num & (s->cluster_sectors - 1);
    663         n = s->cluster_sectors - index_in_cluster;
    664         if (n > nb_sectors)
    665             n = nb_sectors;
    666         cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
    667         if (!cluster_offset)
    668             return -1;
    669 
    670         if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
    671             return -1;
    672         if (m_data.valid) {
    673             /* update L2 tables */
    674             if (vmdk_L2update(bs, &m_data) == -1)
    675                 return -1;
    676         }
    677         nb_sectors -= n;
    678         sector_num += n;
    679         buf += n * 512;
    680 
    681         // update CID on the first write every time the virtual disk is opened
    682         if (!cid_update) {
    683             vmdk_write_cid(bs, time(NULL));
    684             cid_update++;
    685         }
    686     }
    687     return 0;
    688 }
    689 
    690 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
    691 {
    692     int fd, i;
    693     VMDK4Header header;
    694     uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
    695     static const char desc_template[] =
    696         "# Disk DescriptorFile\n"
    697         "version=1\n"
    698         "CID=%x\n"
    699         "parentCID=ffffffff\n"
    700         "createType=\"monolithicSparse\"\n"
    701         "\n"
    702         "# Extent description\n"
    703         "RW %" PRId64 " SPARSE \"%s\"\n"
    704         "\n"
    705         "# The Disk Data Base \n"
    706         "#DDB\n"
    707         "\n"
    708         "ddb.virtualHWVersion = \"%d\"\n"
    709         "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
    710         "ddb.geometry.heads = \"16\"\n"
    711         "ddb.geometry.sectors = \"63\"\n"
    712         "ddb.adapterType = \"ide\"\n";
    713     char desc[1024];
    714     const char *real_filename, *temp_str;
    715     int64_t total_size = 0;
    716     const char *backing_file = NULL;
    717     int flags = 0;
    718 
    719     // Read out options
    720     while (options && options->name) {
    721         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
    722             total_size = options->value.n / 512;
    723         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
    724             backing_file = options->value.s;
    725         } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
    726             flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
    727         }
    728         options++;
    729     }
    730 
    731     /* XXX: add support for backing file */
    732     if (backing_file) {
    733         return vmdk_snapshot_create(filename, backing_file);
    734     }
    735 
    736     fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
    737               0644);
    738     if (fd < 0)
    739         return -1;
    740     magic = cpu_to_be32(VMDK4_MAGIC);
    741     memset(&header, 0, sizeof(header));
    742     header.version = cpu_to_le32(1);
    743     header.flags = cpu_to_le32(3); /* ?? */
    744     header.capacity = cpu_to_le64(total_size);
    745     header.granularity = cpu_to_le64(128);
    746     header.num_gtes_per_gte = cpu_to_le32(512);
    747 
    748     grains = (total_size + header.granularity - 1) / header.granularity;
    749     gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
    750     gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
    751     gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
    752 
    753     header.desc_offset = 1;
    754     header.desc_size = 20;
    755     header.rgd_offset = header.desc_offset + header.desc_size;
    756     header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
    757     header.grain_offset =
    758        ((header.gd_offset + gd_size + (gt_size * gt_count) +
    759          header.granularity - 1) / header.granularity) *
    760         header.granularity;
    761 
    762     header.desc_offset = cpu_to_le64(header.desc_offset);
    763     header.desc_size = cpu_to_le64(header.desc_size);
    764     header.rgd_offset = cpu_to_le64(header.rgd_offset);
    765     header.gd_offset = cpu_to_le64(header.gd_offset);
    766     header.grain_offset = cpu_to_le64(header.grain_offset);
    767 
    768     header.check_bytes[0] = 0xa;
    769     header.check_bytes[1] = 0x20;
    770     header.check_bytes[2] = 0xd;
    771     header.check_bytes[3] = 0xa;
    772 
    773     /* write all the data */
    774     write(fd, &magic, sizeof(magic));
    775     write(fd, &header, sizeof(header));
    776 
    777     ftruncate(fd, header.grain_offset << 9);
    778 
    779     /* write grain directory */
    780     lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
    781     for (i = 0, tmp = header.rgd_offset + gd_size;
    782          i < gt_count; i++, tmp += gt_size)
    783         write(fd, &tmp, sizeof(tmp));
    784 
    785     /* write backup grain directory */
    786     lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
    787     for (i = 0, tmp = header.gd_offset + gd_size;
    788          i < gt_count; i++, tmp += gt_size)
    789         write(fd, &tmp, sizeof(tmp));
    790 
    791     /* compose the descriptor */
    792     real_filename = filename;
    793     if ((temp_str = strrchr(real_filename, '\\')) != NULL)
    794         real_filename = temp_str + 1;
    795     if ((temp_str = strrchr(real_filename, '/')) != NULL)
    796         real_filename = temp_str + 1;
    797     if ((temp_str = strrchr(real_filename, ':')) != NULL)
    798         real_filename = temp_str + 1;
    799     snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
    800              total_size, real_filename,
    801              (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
    802              total_size / (int64_t)(63 * 16));
    803 
    804     /* write the descriptor */
    805     lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
    806     write(fd, desc, strlen(desc));
    807 
    808     close(fd);
    809     return 0;
    810 }
    811 
    812 static void vmdk_close(BlockDriverState *bs)
    813 {
    814     BDRVVmdkState *s = bs->opaque;
    815 
    816     qemu_free(s->l1_table);
    817     qemu_free(s->l2_cache);
    818     // try to close parent image, if exist
    819     vmdk_parent_close(s->hd);
    820     bdrv_delete(s->hd);
    821 }
    822 
    823 static void vmdk_flush(BlockDriverState *bs)
    824 {
    825     BDRVVmdkState *s = bs->opaque;
    826     bdrv_flush(s->hd);
    827 }
    828 
    829 
    830 static QEMUOptionParameter vmdk_create_options[] = {
    831     {
    832         .name = BLOCK_OPT_SIZE,
    833         .type = OPT_SIZE,
    834         .help = "Virtual disk size"
    835     },
    836     {
    837         .name = BLOCK_OPT_BACKING_FILE,
    838         .type = OPT_STRING,
    839         .help = "File name of a base image"
    840     },
    841     {
    842         .name = BLOCK_OPT_COMPAT6,
    843         .type = OPT_FLAG,
    844         .help = "VMDK version 6 image"
    845     },
    846     { NULL }
    847 };
    848 
    849 static BlockDriver bdrv_vmdk = {
    850     .format_name	= "vmdk",
    851     .instance_size	= sizeof(BDRVVmdkState),
    852     .bdrv_probe		= vmdk_probe,
    853     .bdrv_open		= vmdk_open,
    854     .bdrv_read		= vmdk_read,
    855     .bdrv_write		= vmdk_write,
    856     .bdrv_close		= vmdk_close,
    857     .bdrv_create	= vmdk_create,
    858     .bdrv_flush		= vmdk_flush,
    859     .bdrv_is_allocated	= vmdk_is_allocated,
    860 
    861     .create_options = vmdk_create_options,
    862 };
    863 
    864 static void bdrv_vmdk_init(void)
    865 {
    866     bdrv_register(&bdrv_vmdk);
    867 }
    868 
    869 block_init(bdrv_vmdk_init);
    870