KVM: x86: TSC reset compensation
authorZachary Amsden <zamsden@redhat.com>
Fri, 20 Aug 2010 08:07:20 +0000 (22:07 -1000)
committerAvi Kivity <avi@redhat.com>
Sun, 24 Oct 2010 08:51:22 +0000 (10:51 +0200)
Attempt to synchronize TSCs which are reset to the same value.  In the
case of a reliable hardware TSC, we can just re-use the same offset, but
on non-reliable hardware, we can get closer by adjusting the offset to
match the elapsed time.

Signed-off-by: Zachary Amsden <zamsden@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/x86.c

index a215153f1ff637b1fe9e7af222d7bcceba7a26f1..57b4394491ec1488b4ae49008eb5c61aa6079164 100644 (file)
@@ -396,6 +396,9 @@ struct kvm_arch {
        unsigned long irq_sources_bitmap;
        s64 kvmclock_offset;
        spinlock_t tsc_write_lock;
+       u64 last_tsc_nsec;
+       u64 last_tsc_offset;
+       u64 last_tsc_write;
 
        struct kvm_xen_hvm_config xen_hvm_config;
 
index 886132b6ef14064e901f02a0f74d533aed24c684..e7da14c317e6571930cec64e8e6af4853d820913 100644 (file)
@@ -898,11 +898,40 @@ static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
 void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 {
        struct kvm *kvm = vcpu->kvm;
-       u64 offset;
+       u64 offset, ns, elapsed;
        unsigned long flags;
+       struct timespec ts;
 
        spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
        offset = data - native_read_tsc();
+       ktime_get_ts(&ts);
+       monotonic_to_bootbased(&ts);
+       ns = timespec_to_ns(&ts);
+       elapsed = ns - kvm->arch.last_tsc_nsec;
+
+       /*
+        * Special case: identical write to TSC within 5 seconds of
+        * another CPU is interpreted as an attempt to synchronize
+        * (the 5 seconds is to accomodate host load / swapping).
+        *
+        * In that case, for a reliable TSC, we can match TSC offsets,
+        * or make a best guest using kernel_ns value.
+        */
+       if (data == kvm->arch.last_tsc_write && elapsed < 5ULL * NSEC_PER_SEC) {
+               if (!check_tsc_unstable()) {
+                       offset = kvm->arch.last_tsc_offset;
+                       pr_debug("kvm: matched tsc offset for %llu\n", data);
+               } else {
+                       u64 tsc_delta = elapsed * __get_cpu_var(cpu_tsc_khz);
+                       tsc_delta = tsc_delta / USEC_PER_SEC;
+                       offset += tsc_delta;
+                       pr_debug("kvm: adjusted tsc offset by %llu\n", tsc_delta);
+               }
+               ns = kvm->arch.last_tsc_nsec;
+       }
+       kvm->arch.last_tsc_nsec = ns;
+       kvm->arch.last_tsc_write = data;
+       kvm->arch.last_tsc_offset = offset;
        kvm_x86_ops->write_tsc_offset(vcpu, offset);
        spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);