They must be less than the value that KVM_CHECK_EXTENSION returns for
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
+The bits in the dirty bitmap are cleared before the ioctl returns, unless
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is enabled. For more information,
+see the description of the capability.
4.9 KVM_SET_MEMORY_ALIAS
between coalesced mmio and pio except that coalesced pio records accesses
to I/O ports.
+4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
+
+Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_dirty_log (in)
+Returns: 0 on success, -1 on error
+
+/* for KVM_CLEAR_DIRTY_LOG */
+struct kvm_clear_dirty_log {
+ __u32 slot;
+ __u32 num_pages;
+ __u64 first_page;
+ union {
+ void __user *dirty_bitmap; /* one bit per page */
+ __u64 padding;
+ };
+};
+
+The ioctl clears the dirty status of pages in a memory slot, according to
+the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
+field. Bit 0 of the bitmap corresponds to page "first_page" in the
+memory slot, and num_pages is the size in bits of the input bitmap.
+Both first_page and num_pages must be a multiple of 64. For each bit
+that is set in the input bitmap, the corresponding page is marked "clean"
+in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
+(for example via write-protection, or by clearing the dirty bit in
+a page table entry).
+
+If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
+the address space for which you want to return the dirty bitmap.
+They must be less than the value that KVM_CHECK_EXTENSION returns for
+the KVM_CAP_MULTI_ADDRESS_SPACE capability.
+
+This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
+is enabled; for more information, see the description of the capability.
+However, it can always be used as long as KVM_CHECK_EXTENSION confirms
+that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is present.
+
+
5. The kvm_run structure
------------------------
* For the new DR6 bits, note that bit 16 is set iff the #DB exception
will clear DR6.RTM.
+7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
+
+Architectures: all
+Parameters: args[0] whether feature should be enabled or not
+
+With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
+clear and write-protect all pages that are returned as dirty.
+Rather, userspace will have to do this operation separately using
+KVM_CLEAR_DIRTY_LOG.
+
+At the cost of a slightly more complicated operation, this provides better
+scalability and responsiveness for two reasons. First,
+KVM_CLEAR_DIRTY_LOG ioctl can operate on a 64-page granularity rather
+than requiring to sync a full memslot; this ensures that KVM does not
+take spinlocks for an extended period of time. Second, in some cases a
+large amount of time can pass between a call to KVM_GET_DIRTY_LOG and
+userspace actually using the data in the page. Pages can be modified
+during this time, which is inefficint for both the guest and userspace:
+the guest will incur a higher penalty due to write protection faults,
+while userspace can see false reports of dirty pages. Manual reprotection
+helps reducing this time, improving guest performance and reducing the
+number of dirty log false positives.
+
+
8. Other capabilities.
----------------------
return r;
}
+int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ bool flush = false;
+ int r;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = kvm_clear_dirty_log_protect(kvm, log, &flush);
+
+ if (flush) {
+ slots = kvm_memslots(kvm);
+ memslot = id_to_memslot(slots, log->slot);
+
+ /* Let implementation handle TLB/GVA invalidation */
+ kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot);
+ }
+
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
long r;
return r;
}
+int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
+{
+ bool flush = false;
+ int r;
+
+ mutex_lock(&kvm->slots_lock);
+
+ /*
+ * Flush potentially hardware-cached dirty pages to dirty_bitmap.
+ */
+ if (kvm_x86_ops->flush_log_dirty)
+ kvm_x86_ops->flush_log_dirty(kvm);
+
+ r = kvm_clear_dirty_log_protect(kvm, log, &flush);
+
+ /*
+ * All the TLBs can be flushed out of mmu lock, see the comments in
+ * kvm_mmu_slot_remove_write_access().
+ */
+ lockdep_assert_held(&kvm->slots_lock);
+ if (flush)
+ kvm_flush_remote_tlbs(kvm);
+
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
bool line_status)
{
#endif
long tlbs_dirty;
struct list_head devices;
+ bool manual_dirty_log_protect;
struct dentry *debugfs_dentry;
struct kvm_stat_data **debugfs_stat_data;
struct srcu_struct srcu;
int kvm_get_dirty_log_protect(struct kvm *kvm,
struct kvm_dirty_log *log, bool *flush);
+int kvm_clear_dirty_log_protect(struct kvm *kvm,
+ struct kvm_clear_dirty_log *log, bool *flush);
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log);
+int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
+ struct kvm_clear_dirty_log *log);
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
bool line_status);
};
};
+/* for KVM_CLEAR_DIRTY_LOG */
+struct kvm_clear_dirty_log {
+ __u32 slot;
+ __u32 num_pages;
+ __u64 first_page;
+ union {
+ void __user *dirty_bitmap; /* one bit per page */
+ __u64 padding2;
+ };
+};
+
/* for KVM_SET_SIGNAL_MASK */
struct kvm_signal_mask {
__u32 len;
#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
#define KVM_CAP_EXCEPTION_PAYLOAD 164
#define KVM_CAP_ARM_VM_IPA_SIZE 165
+#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166
#ifdef KVM_CAP_IRQ_ROUTING
#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state)
+/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */
+#define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
+TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
+TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
--- /dev/null
+#define USE_CLEAR_DIRTY_LOG
+#include "dirty_log_test.c"
vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code);
+#ifdef USE_CLEAR_DIRTY_LOG
+ struct kvm_enable_cap cap = {};
+
+ cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT;
+ cap.args[0] = 1;
+ vm_enable_cap(vm, &cap);
+#endif
+
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
guest_test_mem,
/* Give the vcpu thread some time to dirty some pages */
usleep(interval * 1000);
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+#ifdef USE_CLEAR_DIRTY_LOG
+ kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
+ DIV_ROUND_UP(host_num_pages, 64) * 64);
+#endif
vm_dirty_log_verify(bmap);
iteration++;
sync_global_to_guest(vm, iteration);
unsigned int mode;
int opt, i;
+#ifdef USE_CLEAR_DIRTY_LOG
+ if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT)) {
+ fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n");
+ exit(KSFT_SKIP);
+ }
+#endif
+
while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) {
switch (opt) {
case 'i':
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
void kvm_vm_release(struct kvm_vm *vmp);
void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
+void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+ uint64_t first_page, uint32_t num_pages);
int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
size_t len);
strerror(-ret));
}
+void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+ uint64_t first_page, uint32_t num_pages)
+{
+ struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
+ .first_page = first_page,
+ .num_pages = num_pages };
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
+ TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
+ strerror(-ret));
+}
+
/*
* Userspace Memory Region Find
*
return r;
}
+int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
+{
+ bool flush = false;
+ int r;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = kvm_clear_dirty_log_protect(kvm, log, &flush);
+
+ if (flush)
+ kvm_flush_remote_tlbs(kvm);
+
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
struct kvm_arm_device_addr *dev_addr)
{
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
/**
* kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
- * are dirty write protect them for next write.
+ * and reenable dirty page tracking for the corresponding pages.
* @kvm: pointer to kvm instance
* @log: slot id and address to which we copy the log
* @is_dirty: flag set if any page is dirty
return -ENOENT;
n = kvm_dirty_bitmap_bytes(memslot);
+ *flush = false;
+ if (kvm->manual_dirty_log_protect) {
+ /*
+ * Unlike kvm_get_dirty_log, we always return false in *flush,
+ * because no flush is needed until KVM_CLEAR_DIRTY_LOG. There
+ * is some code duplication between this function and
+ * kvm_get_dirty_log, but hopefully all architecture
+ * transition to kvm_get_dirty_log_protect and kvm_get_dirty_log
+ * can be eliminated.
+ */
+ dirty_bitmap_buffer = dirty_bitmap;
+ } else {
+ dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
+ memset(dirty_bitmap_buffer, 0, n);
- dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
- memset(dirty_bitmap_buffer, 0, n);
+ spin_lock(&kvm->mmu_lock);
+ for (i = 0; i < n / sizeof(long); i++) {
+ unsigned long mask;
+ gfn_t offset;
- spin_lock(&kvm->mmu_lock);
+ if (!dirty_bitmap[i])
+ continue;
+
+ *flush = true;
+ mask = xchg(&dirty_bitmap[i], 0);
+ dirty_bitmap_buffer[i] = mask;
+
+ if (mask) {
+ offset = i * BITS_PER_LONG;
+ kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
+ offset, mask);
+ }
+ }
+ spin_unlock(&kvm->mmu_lock);
+ }
+
+ if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+ return -EFAULT;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
+
+/**
+ * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap
+ * and reenable dirty page tracking for the corresponding pages.
+ * @kvm: pointer to kvm instance
+ * @log: slot id and address from which to fetch the bitmap of dirty pages
+ */
+int kvm_clear_dirty_log_protect(struct kvm *kvm,
+ struct kvm_clear_dirty_log *log, bool *flush)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int as_id, id, n;
+ gfn_t offset;
+ unsigned long i;
+ unsigned long *dirty_bitmap;
+ unsigned long *dirty_bitmap_buffer;
+
+ as_id = log->slot >> 16;
+ id = (u16)log->slot;
+ if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
+ return -EINVAL;
+
+ if ((log->first_page & 63) || (log->num_pages & 63))
+ return -EINVAL;
+
+ slots = __kvm_memslots(kvm, as_id);
+ memslot = id_to_memslot(slots, id);
+
+ dirty_bitmap = memslot->dirty_bitmap;
+ if (!dirty_bitmap)
+ return -ENOENT;
+
+ n = kvm_dirty_bitmap_bytes(memslot);
*flush = false;
- for (i = 0; i < n / sizeof(long); i++) {
- unsigned long mask;
- gfn_t offset;
+ dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
+ if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n))
+ return -EFAULT;
- if (!dirty_bitmap[i])
+ spin_lock(&kvm->mmu_lock);
+ for (offset = log->first_page,
+ i = offset / BITS_PER_LONG, n = log->num_pages / BITS_PER_LONG; n--;
+ i++, offset += BITS_PER_LONG) {
+ unsigned long mask = *dirty_bitmap_buffer++;
+ atomic_long_t *p = (atomic_long_t *) &dirty_bitmap[i];
+ if (!mask)
continue;
- *flush = true;
-
- mask = xchg(&dirty_bitmap[i], 0);
- dirty_bitmap_buffer[i] = mask;
+ mask &= atomic_long_fetch_andnot(mask, p);
+ /*
+ * mask contains the bits that really have been cleared. This
+ * never includes any bits beyond the length of the memslot (if
+ * the length is not aligned to 64 pages), therefore it is not
+ * a problem if userspace sets them in log->dirty_bitmap.
+ */
if (mask) {
- offset = i * BITS_PER_LONG;
+ *flush = true;
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
offset, mask);
}
}
-
spin_unlock(&kvm->mmu_lock);
- if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
- return -EFAULT;
+
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
+EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect);
#endif
bool kvm_largepages_enabled(void)
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
case KVM_CAP_CHECK_EXTENSION_VM:
case KVM_CAP_ENABLE_CAP_VM:
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
+#endif
return 1;
#ifdef CONFIG_KVM_MMIO
case KVM_CAP_COALESCED_MMIO:
struct kvm_enable_cap *cap)
{
switch (cap->cap) {
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
+ if (cap->flags || (cap->args[0] & ~1))
+ return -EINVAL;
+ kvm->manual_dirty_log_protect = cap->args[0];
+ return 0;
+#endif
default:
return kvm_vm_ioctl_enable_cap(kvm, cap);
}
r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
break;
}
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ case KVM_CLEAR_DIRTY_LOG: {
+ struct kvm_clear_dirty_log log;
+
+ r = -EFAULT;
+ if (copy_from_user(&log, argp, sizeof(log)))
+ goto out;
+ r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
+ break;
+ }
+#endif
#ifdef CONFIG_KVM_MMIO
case KVM_REGISTER_COALESCED_MMIO: {
struct kvm_coalesced_mmio_zone zone;