arch/arm64/kvm/arch_timer.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) 2012 ARM Ltd.
   4  * Author: Marc Zyngier <marc.zyngier@arm.com>
   5  */
   6
   7 #include <linux/cpu.h>
   8 #include <linux/kvm.h>
   9 #include <linux/kvm_host.h>
  10 #include <linux/interrupt.h>
  11 #include <linux/irq.h>
  12 #include <linux/irqdomain.h>
  13 #include <linux/uaccess.h>
  14
  15 #include <clocksource/arm_arch_timer.h>
  16 #include <asm/arch_timer.h>
  17 #include <asm/kvm_emulate.h>
  18 #include <asm/kvm_hyp.h>
  19 #include <asm/kvm_nested.h>
  20
  21 #include <kvm/arm_vgic.h>
  22 #include <kvm/arm_arch_timer.h>
  23
  24 #include "trace.h"
  25
  26 static struct timecounter *timecounter;
  27 static unsigned int host_vtimer_irq;
  28 static unsigned int host_ptimer_irq;
  29 static u32 host_vtimer_irq_flags;
  30 static u32 host_ptimer_irq_flags;
  31
  32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
  33
  34 static const u8 default_ppi[] = {
  35         [TIMER_PTIMER]  = 30,
  36         [TIMER_VTIMER]  = 27,
  37         [TIMER_HPTIMER] = 26,
  38         [TIMER_HVTIMER] = 28,
  39 };
  40
  41 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
  42 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
  43                                  struct arch_timer_context *timer_ctx);
  44 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
  45 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
  46                                 struct arch_timer_context *timer,
  47                                 enum kvm_arch_timer_regs treg,
  48                                 u64 val);
  49 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
  50                               struct arch_timer_context *timer,
  51                               enum kvm_arch_timer_regs treg);
  52 static bool kvm_arch_timer_get_input_level(int vintid);
  53
  54 static struct irq_ops arch_timer_irq_ops = {
  55         .get_input_level = kvm_arch_timer_get_input_level,
  56 };
  57
  58 static bool has_cntpoff(void)
  59 {
  60         return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
  61 }
  62
  63 static int nr_timers(struct kvm_vcpu *vcpu)
  64 {
  65         if (!vcpu_has_nv(vcpu))
  66                 return NR_KVM_EL0_TIMERS;
  67
  68         return NR_KVM_TIMERS;
  69 }
  70
  71 u32 timer_get_ctl(struct arch_timer_context *ctxt)
  72 {
  73         struct kvm_vcpu *vcpu = ctxt->vcpu;
  74
  75         switch(arch_timer_ctx_index(ctxt)) {
  76         case TIMER_VTIMER:
  77                 return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
  78         case TIMER_PTIMER:
  79                 return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
  80         case TIMER_HVTIMER:
  81                 return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
  82         case TIMER_HPTIMER:
  83                 return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
  84         default:
  85                 WARN_ON(1);
  86                 return 0;
  87         }
  88 }
  89
  90 u64 timer_get_cval(struct arch_timer_context *ctxt)
  91 {
  92         struct kvm_vcpu *vcpu = ctxt->vcpu;
  93
  94         switch(arch_timer_ctx_index(ctxt)) {
  95         case TIMER_VTIMER:
  96                 return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
  97         case TIMER_PTIMER:
  98                 return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
  99         case TIMER_HVTIMER:
 100                 return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
 101         case TIMER_HPTIMER:
 102                 return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
 103         default:
 104                 WARN_ON(1);
 105                 return 0;
 106         }
 107 }
 108
 109 static u64 timer_get_offset(struct arch_timer_context *ctxt)
 110 {
 111         u64 offset = 0;
 112
 113         if (!ctxt)
 114                 return 0;
 115
 116         if (ctxt->offset.vm_offset)
 117                 offset += *ctxt->offset.vm_offset;
 118         if (ctxt->offset.vcpu_offset)
 119                 offset += *ctxt->offset.vcpu_offset;
 120
 121         return offset;
 122 }
 123
 124 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
 125 {
 126         struct kvm_vcpu *vcpu = ctxt->vcpu;
 127
 128         switch(arch_timer_ctx_index(ctxt)) {
 129         case TIMER_VTIMER:
 130                 __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl;
 131                 break;
 132         case TIMER_PTIMER:
 133                 __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
 134                 break;
 135         case TIMER_HVTIMER:
 136                 __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl;
 137                 break;
 138         case TIMER_HPTIMER:
 139                 __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl;
 140                 break;
 141         default:
 142                 WARN_ON(1);
 143         }
 144 }
 145
 146 static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
 147 {
 148         struct kvm_vcpu *vcpu = ctxt->vcpu;
 149
 150         switch(arch_timer_ctx_index(ctxt)) {
 151         case TIMER_VTIMER:
 152                 __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval;
 153                 break;
 154         case TIMER_PTIMER:
 155                 __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
 156                 break;
 157         case TIMER_HVTIMER:
 158                 __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval;
 159                 break;
 160         case TIMER_HPTIMER:
 161                 __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval;
 162                 break;
 163         default:
 164                 WARN_ON(1);
 165         }
 166 }
 167
 168 static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
 169 {
 170         if (!ctxt->offset.vm_offset) {
 171                 WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
 172                 return;
 173         }
 174
 175         WRITE_ONCE(*ctxt->offset.vm_offset, offset);
 176 }
 177
 178 u64 kvm_phys_timer_read(void)
 179 {
 180         return timecounter->cc->read(timecounter->cc);
 181 }
 182
 183 static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
 184 {
 185         if (vcpu_has_nv(vcpu)) {
 186                 if (is_hyp_ctxt(vcpu)) {
 187                         map->direct_vtimer = vcpu_hvtimer(vcpu);
 188                         map->direct_ptimer = vcpu_hptimer(vcpu);
 189                         map->emul_vtimer = vcpu_vtimer(vcpu);
 190                         map->emul_ptimer = vcpu_ptimer(vcpu);
 191                 } else {
 192                         map->direct_vtimer = vcpu_vtimer(vcpu);
 193                         map->direct_ptimer = vcpu_ptimer(vcpu);
 194                         map->emul_vtimer = vcpu_hvtimer(vcpu);
 195                         map->emul_ptimer = vcpu_hptimer(vcpu);
 196                 }
 197         } else if (has_vhe()) {
 198                 map->direct_vtimer = vcpu_vtimer(vcpu);
 199                 map->direct_ptimer = vcpu_ptimer(vcpu);
 200                 map->emul_vtimer = NULL;
 201                 map->emul_ptimer = NULL;
 202         } else {
 203                 map->direct_vtimer = vcpu_vtimer(vcpu);
 204                 map->direct_ptimer = NULL;
 205                 map->emul_vtimer = NULL;
 206                 map->emul_ptimer = vcpu_ptimer(vcpu);
 207         }
 208
 209         trace_kvm_get_timer_map(vcpu->vcpu_id, map);
 210 }
 211
 212 static inline bool userspace_irqchip(struct kvm *kvm)
 213 {
 214         return static_branch_unlikely(&userspace_irqchip_in_use) &&
 215                 unlikely(!irqchip_in_kernel(kvm));
 216 }
 217
 218 static void soft_timer_start(struct hrtimer *hrt, u64 ns)
 219 {
 220         hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
 221                       HRTIMER_MODE_ABS_HARD);
 222 }
 223
 224 static void soft_timer_cancel(struct hrtimer *hrt)
 225 {
 226         hrtimer_cancel(hrt);
 227 }
 228
 229 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 230 {
 231         struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
 232         struct arch_timer_context *ctx;
 233         struct timer_map map;
 234
 235         /*
 236          * We may see a timer interrupt after vcpu_put() has been called which
 237          * sets the CPU's vcpu pointer to NULL, because even though the timer
 238          * has been disabled in timer_save_state(), the hardware interrupt
 239          * signal may not have been retired from the interrupt controller yet.
 240          */
 241         if (!vcpu)
 242                 return IRQ_HANDLED;
 243
 244         get_timer_map(vcpu, &map);
 245
 246         if (irq == host_vtimer_irq)
 247                 ctx = map.direct_vtimer;
 248         else
 249                 ctx = map.direct_ptimer;
 250
 251         if (kvm_timer_should_fire(ctx))
 252                 kvm_timer_update_irq(vcpu, true, ctx);
 253
 254         if (userspace_irqchip(vcpu->kvm) &&
 255             !static_branch_unlikely(&has_gic_active_state))
 256                 disable_percpu_irq(host_vtimer_irq);
 257
 258         return IRQ_HANDLED;
 259 }
 260
 261 static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
 262                                      u64 val)
 263 {
 264         u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
 265
 266         if (now < val) {
 267                 u64 ns;
 268
 269                 ns = cyclecounter_cyc2ns(timecounter->cc,
 270                                          val - now,
 271                                          timecounter->mask,
 272                                          &timer_ctx->ns_frac);
 273                 return ns;
 274         }
 275
 276         return 0;
 277 }
 278
 279 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
 280 {
 281         return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
 282 }
 283
 284 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
 285 {
 286         WARN_ON(timer_ctx && timer_ctx->loaded);
 287         return timer_ctx &&
 288                 ((timer_get_ctl(timer_ctx) &
 289                   (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
 290 }
 291
 292 static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
 293 {
 294         return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
 295                 vcpu_get_flag(vcpu, IN_WFIT));
 296 }
 297
 298 static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
 299 {
 300         u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
 301         struct arch_timer_context *ctx;
 302
 303         ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu)
 304                                                        : vcpu_vtimer(vcpu);
 305
 306         return kvm_counter_compute_delta(ctx, val);
 307 }
 308
 309 /*
 310  * Returns the earliest expiration time in ns among guest timers.
 311  * Note that it will return 0 if none of timers can fire.
 312  */
 313 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
 314 {
 315         u64 min_delta = ULLONG_MAX;
 316         int i;
 317
 318         for (i = 0; i < nr_timers(vcpu); i++) {
 319                 struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
 320
 321                 WARN(ctx->loaded, "timer %d loaded\n", i);
 322                 if (kvm_timer_irq_can_fire(ctx))
 323                         min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
 324         }
 325
 326         if (vcpu_has_wfit_active(vcpu))
 327                 min_delta = min(min_delta, wfit_delay_ns(vcpu));
 328
 329         /* If none of timers can fire, then return 0 */
 330         if (min_delta == ULLONG_MAX)
 331                 return 0;
 332
 333         return min_delta;
 334 }
 335
 336 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
 337 {
 338         struct arch_timer_cpu *timer;
 339         struct kvm_vcpu *vcpu;
 340         u64 ns;
 341
 342         timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
 343         vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
 344
 345         /*
 346          * Check that the timer has really expired from the guest's
 347          * PoV (NTP on the host may have forced it to expire
 348          * early). If we should have slept longer, restart it.
 349          */
 350         ns = kvm_timer_earliest_exp(vcpu);
 351         if (unlikely(ns)) {
 352                 hrtimer_forward_now(hrt, ns_to_ktime(ns));
 353                 return HRTIMER_RESTART;
 354         }
 355
 356         kvm_vcpu_wake_up(vcpu);
 357         return HRTIMER_NORESTART;
 358 }
 359
 360 static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
 361 {
 362         struct arch_timer_context *ctx;
 363         struct kvm_vcpu *vcpu;
 364         u64 ns;
 365
 366         ctx = container_of(hrt, struct arch_timer_context, hrtimer);
 367         vcpu = ctx->vcpu;
 368
 369         trace_kvm_timer_hrtimer_expire(ctx);
 370
 371         /*
 372          * Check that the timer has really expired from the guest's
 373          * PoV (NTP on the host may have forced it to expire
 374          * early). If not ready, schedule for a later time.
 375          */
 376         ns = kvm_timer_compute_delta(ctx);
 377         if (unlikely(ns)) {
 378                 hrtimer_forward_now(hrt, ns_to_ktime(ns));
 379                 return HRTIMER_RESTART;
 380         }
 381
 382         kvm_timer_update_irq(vcpu, true, ctx);
 383         return HRTIMER_NORESTART;
 384 }
 385
 386 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 387 {
 388         enum kvm_arch_timers index;
 389         u64 cval, now;
 390
 391         if (!timer_ctx)
 392                 return false;
 393
 394         index = arch_timer_ctx_index(timer_ctx);
 395
 396         if (timer_ctx->loaded) {
 397                 u32 cnt_ctl = 0;
 398
 399                 switch (index) {
 400                 case TIMER_VTIMER:
 401                 case TIMER_HVTIMER:
 402                         cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
 403                         break;
 404                 case TIMER_PTIMER:
 405                 case TIMER_HPTIMER:
 406                         cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
 407                         break;
 408                 case NR_KVM_TIMERS:
 409                         /* GCC is braindead */
 410                         cnt_ctl = 0;
 411                         break;
 412                 }
 413
 414                 return  (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
 415                         (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
 416                        !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
 417         }
 418
 419         if (!kvm_timer_irq_can_fire(timer_ctx))
 420                 return false;
 421
 422         cval = timer_get_cval(timer_ctx);
 423         now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
 424
 425         return cval <= now;
 426 }
 427
 428 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 429 {
 430         return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
 431 }
 432
 433 /*
 434  * Reflect the timer output level into the kvm_run structure
 435  */
 436 void kvm_timer_update_run(struct kvm_vcpu *vcpu)
 437 {
 438         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 439         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 440         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
 441
 442         /* Populate the device bitmap with the timer states */
 443         regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
 444                                     KVM_ARM_DEV_EL1_PTIMER);
 445         if (kvm_timer_should_fire(vtimer))
 446                 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
 447         if (kvm_timer_should_fire(ptimer))
 448                 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
 449 }
 450
 451 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
 452                                  struct arch_timer_context *timer_ctx)
 453 {
 454         int ret;
 455
 456         timer_ctx->irq.level = new_level;
 457         trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
 458                                    timer_ctx->irq.level);
 459
 460         if (!userspace_irqchip(vcpu->kvm)) {
 461                 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
 462                                           timer_irq(timer_ctx),
 463                                           timer_ctx->irq.level,
 464                                           timer_ctx);
 465                 WARN_ON(ret);
 466         }
 467 }
 468
 469 /* Only called for a fully emulated timer */
 470 static void timer_emulate(struct arch_timer_context *ctx)
 471 {
 472         bool should_fire = kvm_timer_should_fire(ctx);
 473
 474         trace_kvm_timer_emulate(ctx, should_fire);
 475
 476         if (should_fire != ctx->irq.level) {
 477                 kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
 478                 return;
 479         }
 480
 481         /*
 482          * If the timer can fire now, we don't need to have a soft timer
 483          * scheduled for the future.  If the timer cannot fire at all,
 484          * then we also don't need a soft timer.
 485          */
 486         if (should_fire || !kvm_timer_irq_can_fire(ctx))
 487                 return;
 488
 489         soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
 490 }
 491
 492 static void set_cntvoff(u64 cntvoff)
 493 {
 494         kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
 495 }
 496
 497 static void set_cntpoff(u64 cntpoff)
 498 {
 499         if (has_cntpoff())
 500                 write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
 501 }
 502
 503 static void timer_save_state(struct arch_timer_context *ctx)
 504 {
 505         struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
 506         enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
 507         unsigned long flags;
 508
 509         if (!timer->enabled)
 510                 return;
 511
 512         local_irq_save(flags);
 513
 514         if (!ctx->loaded)
 515                 goto out;
 516
 517         switch (index) {
 518                 u64 cval;
 519
 520         case TIMER_VTIMER:
 521         case TIMER_HVTIMER:
 522                 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
 523                 timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
 524
 525                 /* Disable the timer */
 526                 write_sysreg_el0(0, SYS_CNTV_CTL);
 527                 isb();
 528
 529                 /*
 530                  * The kernel may decide to run userspace after
 531                  * calling vcpu_put, so we reset cntvoff to 0 to
 532                  * ensure a consistent read between user accesses to
 533                  * the virtual counter and kernel access to the
 534                  * physical counter of non-VHE case.
 535                  *
 536                  * For VHE, the virtual counter uses a fixed virtual
 537                  * offset of zero, so no need to zero CNTVOFF_EL2
 538                  * register, but this is actually useful when switching
 539                  * between EL1/vEL2 with NV.
 540                  *
 541                  * Do it unconditionally, as this is either unavoidable
 542                  * or dirt cheap.
 543                  */
 544                 set_cntvoff(0);
 545                 break;
 546         case TIMER_PTIMER:
 547         case TIMER_HPTIMER:
 548                 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
 549                 cval = read_sysreg_el0(SYS_CNTP_CVAL);
 550
 551                 if (!has_cntpoff())
 552                         cval -= timer_get_offset(ctx);
 553
 554                 timer_set_cval(ctx, cval);
 555
 556                 /* Disable the timer */
 557                 write_sysreg_el0(0, SYS_CNTP_CTL);
 558                 isb();
 559
 560                 set_cntpoff(0);
 561                 break;
 562         case NR_KVM_TIMERS:
 563                 BUG();
 564         }
 565
 566         trace_kvm_timer_save_state(ctx);
 567
 568         ctx->loaded = false;
 569 out:
 570         local_irq_restore(flags);
 571 }
 572
 573 /*
 574  * Schedule the background timer before calling kvm_vcpu_halt, so that this
 575  * thread is removed from its waitqueue and made runnable when there's a timer
 576  * interrupt to handle.
 577  */
 578 static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
 579 {
 580         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 581         struct timer_map map;
 582
 583         get_timer_map(vcpu, &map);
 584
 585         /*
 586          * If no timers are capable of raising interrupts (disabled or
 587          * masked), then there's no more work for us to do.
 588          */
 589         if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
 590             !kvm_timer_irq_can_fire(map.direct_ptimer) &&
 591             !kvm_timer_irq_can_fire(map.emul_vtimer) &&
 592             !kvm_timer_irq_can_fire(map.emul_ptimer) &&
 593             !vcpu_has_wfit_active(vcpu))
 594                 return;
 595
 596         /*
 597          * At least one guest time will expire. Schedule a background timer.
 598          * Set the earliest expiration time among the guest timers.
 599          */
 600         soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
 601 }
 602
 603 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
 604 {
 605         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 606
 607         soft_timer_cancel(&timer->bg_timer);
 608 }
 609
 610 static void timer_restore_state(struct arch_timer_context *ctx)
 611 {
 612         struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
 613         enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
 614         unsigned long flags;
 615
 616         if (!timer->enabled)
 617                 return;
 618
 619         local_irq_save(flags);
 620
 621         if (ctx->loaded)
 622                 goto out;
 623
 624         switch (index) {
 625                 u64 cval, offset;
 626
 627         case TIMER_VTIMER:
 628         case TIMER_HVTIMER:
 629                 set_cntvoff(timer_get_offset(ctx));
 630                 write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
 631                 isb();
 632                 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
 633                 break;
 634         case TIMER_PTIMER:
 635         case TIMER_HPTIMER:
 636                 cval = timer_get_cval(ctx);
 637                 offset = timer_get_offset(ctx);
 638                 set_cntpoff(offset);
 639                 if (!has_cntpoff())
 640                         cval += offset;
 641                 write_sysreg_el0(cval, SYS_CNTP_CVAL);
 642                 isb();
 643                 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
 644                 break;
 645         case NR_KVM_TIMERS:
 646                 BUG();
 647         }
 648
 649         trace_kvm_timer_restore_state(ctx);
 650
 651         ctx->loaded = true;
 652 out:
 653         local_irq_restore(flags);
 654 }
 655
 656 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
 657 {
 658         int r;
 659         r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
 660         WARN_ON(r);
 661 }
 662
 663 static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
 664 {
 665         struct kvm_vcpu *vcpu = ctx->vcpu;
 666         bool phys_active = false;
 667
 668         /*
 669          * Update the timer output so that it is likely to match the
 670          * state we're about to restore. If the timer expires between
 671          * this point and the register restoration, we'll take the
 672          * interrupt anyway.
 673          */
 674         kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
 675
 676         if (irqchip_in_kernel(vcpu->kvm))
 677                 phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
 678
 679         phys_active |= ctx->irq.level;
 680
 681         set_timer_irq_phys_active(ctx, phys_active);
 682 }
 683
 684 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
 685 {
 686         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 687
 688         /*
 689          * Update the timer output so that it is likely to match the
 690          * state we're about to restore. If the timer expires between
 691          * this point and the register restoration, we'll take the
 692          * interrupt anyway.
 693          */
 694         kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
 695
 696         /*
 697          * When using a userspace irqchip with the architected timers and a
 698          * host interrupt controller that doesn't support an active state, we
 699          * must still prevent continuously exiting from the guest, and
 700          * therefore mask the physical interrupt by disabling it on the host
 701          * interrupt controller when the virtual level is high, such that the
 702          * guest can make forward progress.  Once we detect the output level
 703          * being de-asserted, we unmask the interrupt again so that we exit
 704          * from the guest when the timer fires.
 705          */
 706         if (vtimer->irq.level)
 707                 disable_percpu_irq(host_vtimer_irq);
 708         else
 709                 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 710 }
 711
 712 /* If _pred is true, set bit in _set, otherwise set it in _clr */
 713 #define assign_clear_set_bit(_pred, _bit, _clr, _set)                   \
 714         do {                                                            \
 715                 if (_pred)                                              \
 716                         (_set) |= (_bit);                               \
 717                 else                                                    \
 718                         (_clr) |= (_bit);                               \
 719         } while (0)
 720
 721 static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
 722                                               struct timer_map *map)
 723 {
 724         int hw, ret;
 725
 726         if (!irqchip_in_kernel(vcpu->kvm))
 727                 return;
 728
 729         /*
 730          * We only ever unmap the vtimer irq on a VHE system that runs nested
 731          * virtualization, in which case we have both a valid emul_vtimer,
 732          * emul_ptimer, direct_vtimer, and direct_ptimer.
 733          *
 734          * Since this is called from kvm_timer_vcpu_load(), a change between
 735          * vEL2 and vEL1/0 will have just happened, and the timer_map will
 736          * represent this, and therefore we switch the emul/direct mappings
 737          * below.
 738          */
 739         hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
 740         if (hw < 0) {
 741                 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
 742                 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
 743
 744                 ret = kvm_vgic_map_phys_irq(vcpu,
 745                                             map->direct_vtimer->host_timer_irq,
 746                                             timer_irq(map->direct_vtimer),
 747                                             &arch_timer_irq_ops);
 748                 WARN_ON_ONCE(ret);
 749                 ret = kvm_vgic_map_phys_irq(vcpu,
 750                                             map->direct_ptimer->host_timer_irq,
 751                                             timer_irq(map->direct_ptimer),
 752                                             &arch_timer_irq_ops);
 753                 WARN_ON_ONCE(ret);
 754
 755                 /*
 756                  * The virtual offset behaviour is "interresting", as it
 757                  * always applies when HCR_EL2.E2H==0, but only when
 758                  * accessed from EL1 when HCR_EL2.E2H==1. So make sure we
 759                  * track E2H when putting the HV timer in "direct" mode.
 760                  */
 761                 if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
 762                         struct arch_timer_offset *offs = &map->direct_vtimer->offset;
 763
 764                         if (vcpu_el2_e2h_is_set(vcpu))
 765                                 offs->vcpu_offset = NULL;
 766                         else
 767                                 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
 768                 }
 769         }
 770 }
 771
 772 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
 773 {
 774         bool tpt, tpc;
 775         u64 clr, set;
 776
 777         /*
 778          * No trapping gets configured here with nVHE. See
 779          * __timer_enable_traps(), which is where the stuff happens.
 780          */
 781         if (!has_vhe())
 782                 return;
 783
 784         /*
 785          * Our default policy is not to trap anything. As we progress
 786          * within this function, reality kicks in and we start adding
 787          * traps based on emulation requirements.
 788          */
 789         tpt = tpc = false;
 790
 791         /*
 792          * We have two possibility to deal with a physical offset:
 793          *
 794          * - Either we have CNTPOFF (yay!) or the offset is 0:
 795          *   we let the guest freely access the HW
 796          *
 797          * - or neither of these condition apply:
 798          *   we trap accesses to the HW, but still use it
 799          *   after correcting the physical offset
 800          */
 801         if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
 802                 tpt = tpc = true;
 803
 804         /*
 805          * Apply the enable bits that the guest hypervisor has requested for
 806          * its own guest. We can only add traps that wouldn't have been set
 807          * above.
 808          */
 809         if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
 810                 u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
 811
 812                 /* Use the VHE format for mental sanity */
 813                 if (!vcpu_el2_e2h_is_set(vcpu))
 814                         val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
 815
 816                 tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
 817                 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
 818         }
 819
 820         /*
 821          * Now that we have collected our requirements, compute the
 822          * trap and enable bits.
 823          */
 824         set = 0;
 825         clr = 0;
 826
 827         assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
 828         assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
 829
 830         /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
 831         sysreg_clear_set(cnthctl_el2, clr, set);
 832 }
 833
 834 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 835 {
 836         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 837         struct timer_map map;
 838
 839         if (unlikely(!timer->enabled))
 840                 return;
 841
 842         get_timer_map(vcpu, &map);
 843
 844         if (static_branch_likely(&has_gic_active_state)) {
 845                 if (vcpu_has_nv(vcpu))
 846                         kvm_timer_vcpu_load_nested_switch(vcpu, &map);
 847
 848                 kvm_timer_vcpu_load_gic(map.direct_vtimer);
 849                 if (map.direct_ptimer)
 850                         kvm_timer_vcpu_load_gic(map.direct_ptimer);
 851         } else {
 852                 kvm_timer_vcpu_load_nogic(vcpu);
 853         }
 854
 855         kvm_timer_unblocking(vcpu);
 856
 857         timer_restore_state(map.direct_vtimer);
 858         if (map.direct_ptimer)
 859                 timer_restore_state(map.direct_ptimer);
 860         if (map.emul_vtimer)
 861                 timer_emulate(map.emul_vtimer);
 862         if (map.emul_ptimer)
 863                 timer_emulate(map.emul_ptimer);
 864
 865         timer_set_traps(vcpu, &map);
 866 }
 867
 868 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
 869 {
 870         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 871         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 872         struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
 873         bool vlevel, plevel;
 874
 875         if (likely(irqchip_in_kernel(vcpu->kvm)))
 876                 return false;
 877
 878         vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
 879         plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
 880
 881         return kvm_timer_should_fire(vtimer) != vlevel ||
 882                kvm_timer_should_fire(ptimer) != plevel;
 883 }
 884
 885 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 886 {
 887         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 888         struct timer_map map;
 889
 890         if (unlikely(!timer->enabled))
 891                 return;
 892
 893         get_timer_map(vcpu, &map);
 894
 895         timer_save_state(map.direct_vtimer);
 896         if (map.direct_ptimer)
 897                 timer_save_state(map.direct_ptimer);
 898
 899         /*
 900          * Cancel soft timer emulation, because the only case where we
 901          * need it after a vcpu_put is in the context of a sleeping VCPU, and
 902          * in that case we already factor in the deadline for the physical
 903          * timer when scheduling the bg_timer.
 904          *
 905          * In any case, we re-schedule the hrtimer for the physical timer when
 906          * coming back to the VCPU thread in kvm_timer_vcpu_load().
 907          */
 908         if (map.emul_vtimer)
 909                 soft_timer_cancel(&map.emul_vtimer->hrtimer);
 910         if (map.emul_ptimer)
 911                 soft_timer_cancel(&map.emul_ptimer->hrtimer);
 912
 913         if (kvm_vcpu_is_blocking(vcpu))
 914                 kvm_timer_blocking(vcpu);
 915 }
 916
 917 /*
 918  * With a userspace irqchip we have to check if the guest de-asserted the
 919  * timer and if so, unmask the timer irq signal on the host interrupt
 920  * controller to ensure that we see future timer signals.
 921  */
 922 static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
 923 {
 924         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 925
 926         if (!kvm_timer_should_fire(vtimer)) {
 927                 kvm_timer_update_irq(vcpu, false, vtimer);
 928                 if (static_branch_likely(&has_gic_active_state))
 929                         set_timer_irq_phys_active(vtimer, false);
 930                 else
 931                         enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 932         }
 933 }
 934
 935 void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
 936 {
 937         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 938
 939         if (unlikely(!timer->enabled))
 940                 return;
 941
 942         if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
 943                 unmask_vtimer_irq_user(vcpu);
 944 }
 945
 946 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 947 {
 948         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 949         struct timer_map map;
 950
 951         get_timer_map(vcpu, &map);
 952
 953         /*
 954          * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
 955          * and to 0 for ARMv7.  We provide an implementation that always
 956          * resets the timer to be disabled and unmasked and is compliant with
 957          * the ARMv7 architecture.
 958          */
 959         for (int i = 0; i < nr_timers(vcpu); i++)
 960                 timer_set_ctl(vcpu_get_timer(vcpu, i), 0);
 961
 962         /*
 963          * A vcpu running at EL2 is in charge of the offset applied to
 964          * the virtual timer, so use the physical VM offset, and point
 965          * the vcpu offset to CNTVOFF_EL2.
 966          */
 967         if (vcpu_has_nv(vcpu)) {
 968                 struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
 969
 970                 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
 971                 offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
 972         }
 973
 974         if (timer->enabled) {
 975                 for (int i = 0; i < nr_timers(vcpu); i++)
 976                         kvm_timer_update_irq(vcpu, false,
 977                                              vcpu_get_timer(vcpu, i));
 978
 979                 if (irqchip_in_kernel(vcpu->kvm)) {
 980                         kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
 981                         if (map.direct_ptimer)
 982                                 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
 983                 }
 984         }
 985
 986         if (map.emul_vtimer)
 987                 soft_timer_cancel(&map.emul_vtimer->hrtimer);
 988         if (map.emul_ptimer)
 989                 soft_timer_cancel(&map.emul_ptimer->hrtimer);
 990
 991         return 0;
 992 }
 993
 994 static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
 995 {
 996         struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
 997         struct kvm *kvm = vcpu->kvm;
 998
 999         ctxt->vcpu = vcpu;
1000
1001         if (timerid == TIMER_VTIMER)
1002                 ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
1003         else
1004                 ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
1005
1006         hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1007         ctxt->hrtimer.function = kvm_hrtimer_expire;
1008
1009         switch (timerid) {
1010         case TIMER_PTIMER:
1011         case TIMER_HPTIMER:
1012                 ctxt->host_timer_irq = host_ptimer_irq;
1013                 break;
1014         case TIMER_VTIMER:
1015         case TIMER_HVTIMER:
1016                 ctxt->host_timer_irq = host_vtimer_irq;
1017                 break;
1018         }
1019 }
1020
1021 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
1022 {
1023         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1024
1025         for (int i = 0; i < NR_KVM_TIMERS; i++)
1026                 timer_context_init(vcpu, i);
1027
1028         /* Synchronize offsets across timers of a VM if not already provided */
1029         if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
1030                 timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
1031                 timer_set_offset(vcpu_ptimer(vcpu), 0);
1032         }
1033
1034         hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1035         timer->bg_timer.function = kvm_bg_timer_expire;
1036 }
1037
1038 void kvm_timer_init_vm(struct kvm *kvm)
1039 {
1040         for (int i = 0; i < NR_KVM_TIMERS; i++)
1041                 kvm->arch.timer_data.ppi[i] = default_ppi[i];
1042 }
1043
1044 void kvm_timer_cpu_up(void)
1045 {
1046         enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
1047         if (host_ptimer_irq)
1048                 enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
1049 }
1050
1051 void kvm_timer_cpu_down(void)
1052 {
1053         disable_percpu_irq(host_vtimer_irq);
1054         if (host_ptimer_irq)
1055                 disable_percpu_irq(host_ptimer_irq);
1056 }
1057
1058 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
1059 {
1060         struct arch_timer_context *timer;
1061
1062         switch (regid) {
1063         case KVM_REG_ARM_TIMER_CTL:
1064                 timer = vcpu_vtimer(vcpu);
1065                 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1066                 break;
1067         case KVM_REG_ARM_TIMER_CNT:
1068                 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1069                               &vcpu->kvm->arch.flags)) {
1070                         timer = vcpu_vtimer(vcpu);
1071                         timer_set_offset(timer, kvm_phys_timer_read() - value);
1072                 }
1073                 break;
1074         case KVM_REG_ARM_TIMER_CVAL:
1075                 timer = vcpu_vtimer(vcpu);
1076                 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1077                 break;
1078         case KVM_REG_ARM_PTIMER_CTL:
1079                 timer = vcpu_ptimer(vcpu);
1080                 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1081                 break;
1082         case KVM_REG_ARM_PTIMER_CNT:
1083                 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1084                               &vcpu->kvm->arch.flags)) {
1085                         timer = vcpu_ptimer(vcpu);
1086                         timer_set_offset(timer, kvm_phys_timer_read() - value);
1087                 }
1088                 break;
1089         case KVM_REG_ARM_PTIMER_CVAL:
1090                 timer = vcpu_ptimer(vcpu);
1091                 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1092                 break;
1093
1094         default:
1095                 return -1;
1096         }
1097
1098         return 0;
1099 }
1100
1101 static u64 read_timer_ctl(struct arch_timer_context *timer)
1102 {
1103         /*
1104          * Set ISTATUS bit if it's expired.
1105          * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
1106          * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
1107          * regardless of ENABLE bit for our implementation convenience.
1108          */
1109         u32 ctl = timer_get_ctl(timer);
1110
1111         if (!kvm_timer_compute_delta(timer))
1112                 ctl |= ARCH_TIMER_CTRL_IT_STAT;
1113
1114         return ctl;
1115 }
1116
1117 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
1118 {
1119         switch (regid) {
1120         case KVM_REG_ARM_TIMER_CTL:
1121                 return kvm_arm_timer_read(vcpu,
1122                                           vcpu_vtimer(vcpu), TIMER_REG_CTL);
1123         case KVM_REG_ARM_TIMER_CNT:
1124                 return kvm_arm_timer_read(vcpu,
1125                                           vcpu_vtimer(vcpu), TIMER_REG_CNT);
1126         case KVM_REG_ARM_TIMER_CVAL:
1127                 return kvm_arm_timer_read(vcpu,
1128                                           vcpu_vtimer(vcpu), TIMER_REG_CVAL);
1129         case KVM_REG_ARM_PTIMER_CTL:
1130                 return kvm_arm_timer_read(vcpu,
1131                                           vcpu_ptimer(vcpu), TIMER_REG_CTL);
1132         case KVM_REG_ARM_PTIMER_CNT:
1133                 return kvm_arm_timer_read(vcpu,
1134                                           vcpu_ptimer(vcpu), TIMER_REG_CNT);
1135         case KVM_REG_ARM_PTIMER_CVAL:
1136                 return kvm_arm_timer_read(vcpu,
1137                                           vcpu_ptimer(vcpu), TIMER_REG_CVAL);
1138         }
1139         return (u64)-1;
1140 }
1141
1142 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
1143                               struct arch_timer_context *timer,
1144                               enum kvm_arch_timer_regs treg)
1145 {
1146         u64 val;
1147
1148         switch (treg) {
1149         case TIMER_REG_TVAL:
1150                 val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer);
1151                 val = lower_32_bits(val);
1152                 break;
1153
1154         case TIMER_REG_CTL:
1155                 val = read_timer_ctl(timer);
1156                 break;
1157
1158         case TIMER_REG_CVAL:
1159                 val = timer_get_cval(timer);
1160                 break;
1161
1162         case TIMER_REG_CNT:
1163                 val = kvm_phys_timer_read() - timer_get_offset(timer);
1164                 break;
1165
1166         case TIMER_REG_VOFF:
1167                 val = *timer->offset.vcpu_offset;
1168                 break;
1169
1170         default:
1171                 BUG();
1172         }
1173
1174         return val;
1175 }
1176
1177 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
1178                               enum kvm_arch_timers tmr,
1179                               enum kvm_arch_timer_regs treg)
1180 {
1181         struct arch_timer_context *timer;
1182         struct timer_map map;
1183         u64 val;
1184
1185         get_timer_map(vcpu, &map);
1186         timer = vcpu_get_timer(vcpu, tmr);
1187
1188         if (timer == map.emul_vtimer || timer == map.emul_ptimer)
1189                 return kvm_arm_timer_read(vcpu, timer, treg);
1190
1191         preempt_disable();
1192         timer_save_state(timer);
1193
1194         val = kvm_arm_timer_read(vcpu, timer, treg);
1195
1196         timer_restore_state(timer);
1197         preempt_enable();
1198
1199         return val;
1200 }
1201
1202 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
1203                                 struct arch_timer_context *timer,
1204                                 enum kvm_arch_timer_regs treg,
1205                                 u64 val)
1206 {
1207         switch (treg) {
1208         case TIMER_REG_TVAL:
1209                 timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val);
1210                 break;
1211
1212         case TIMER_REG_CTL:
1213                 timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT);
1214                 break;
1215
1216         case TIMER_REG_CVAL:
1217                 timer_set_cval(timer, val);
1218                 break;
1219
1220         case TIMER_REG_VOFF:
1221                 *timer->offset.vcpu_offset = val;
1222                 break;
1223
1224         default:
1225                 BUG();
1226         }
1227 }
1228
1229 void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
1230                                 enum kvm_arch_timers tmr,
1231                                 enum kvm_arch_timer_regs treg,
1232                                 u64 val)
1233 {
1234         struct arch_timer_context *timer;
1235         struct timer_map map;
1236
1237         get_timer_map(vcpu, &map);
1238         timer = vcpu_get_timer(vcpu, tmr);
1239         if (timer == map.emul_vtimer || timer == map.emul_ptimer) {
1240                 soft_timer_cancel(&timer->hrtimer);
1241                 kvm_arm_timer_write(vcpu, timer, treg, val);
1242                 timer_emulate(timer);
1243         } else {
1244                 preempt_disable();
1245                 timer_save_state(timer);
1246                 kvm_arm_timer_write(vcpu, timer, treg, val);
1247                 timer_restore_state(timer);
1248                 preempt_enable();
1249         }
1250 }
1251
1252 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
1253 {
1254         if (vcpu)
1255                 irqd_set_forwarded_to_vcpu(d);
1256         else
1257                 irqd_clr_forwarded_to_vcpu(d);
1258
1259         return 0;
1260 }
1261
1262 static int timer_irq_set_irqchip_state(struct irq_data *d,
1263                                        enum irqchip_irq_state which, bool val)
1264 {
1265         if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
1266                 return irq_chip_set_parent_state(d, which, val);
1267
1268         if (val)
1269                 irq_chip_mask_parent(d);
1270         else
1271                 irq_chip_unmask_parent(d);
1272
1273         return 0;
1274 }
1275
1276 static void timer_irq_eoi(struct irq_data *d)
1277 {
1278         if (!irqd_is_forwarded_to_vcpu(d))
1279                 irq_chip_eoi_parent(d);
1280 }
1281
1282 static void timer_irq_ack(struct irq_data *d)
1283 {
1284         d = d->parent_data;
1285         if (d->chip->irq_ack)
1286                 d->chip->irq_ack(d);
1287 }
1288
1289 static struct irq_chip timer_chip = {
1290         .name                   = "KVM",
1291         .irq_ack                = timer_irq_ack,
1292         .irq_mask               = irq_chip_mask_parent,
1293         .irq_unmask             = irq_chip_unmask_parent,
1294         .irq_eoi                = timer_irq_eoi,
1295         .irq_set_type           = irq_chip_set_type_parent,
1296         .irq_set_vcpu_affinity  = timer_irq_set_vcpu_affinity,
1297         .irq_set_irqchip_state  = timer_irq_set_irqchip_state,
1298 };
1299
1300 static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
1301                                   unsigned int nr_irqs, void *arg)
1302 {
1303         irq_hw_number_t hwirq = (uintptr_t)arg;
1304
1305         return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
1306                                              &timer_chip, NULL);
1307 }
1308
1309 static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
1310                                   unsigned int nr_irqs)
1311 {
1312 }
1313
1314 static const struct irq_domain_ops timer_domain_ops = {
1315         .alloc  = timer_irq_domain_alloc,
1316         .free   = timer_irq_domain_free,
1317 };
1318
1319 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
1320 {
1321         *flags = irq_get_trigger_type(virq);
1322         if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
1323                 kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
1324                         virq);
1325                 *flags = IRQF_TRIGGER_LOW;
1326         }
1327 }
1328
1329 static int kvm_irq_init(struct arch_timer_kvm_info *info)
1330 {
1331         struct irq_domain *domain = NULL;
1332
1333         if (info->virtual_irq <= 0) {
1334                 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
1335                         info->virtual_irq);
1336                 return -ENODEV;
1337         }
1338
1339         host_vtimer_irq = info->virtual_irq;
1340         kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
1341
1342         if (kvm_vgic_global_state.no_hw_deactivation) {
1343                 struct fwnode_handle *fwnode;
1344                 struct irq_data *data;
1345
1346                 fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
1347                 if (!fwnode)
1348                         return -ENOMEM;
1349
1350                 /* Assume both vtimer and ptimer in the same parent */
1351                 data = irq_get_irq_data(host_vtimer_irq);
1352                 domain = irq_domain_create_hierarchy(data->domain, 0,
1353                                                      NR_KVM_TIMERS, fwnode,
1354                                                      &timer_domain_ops, NULL);
1355                 if (!domain) {
1356                         irq_domain_free_fwnode(fwnode);
1357                         return -ENOMEM;
1358                 }
1359
1360                 arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
1361                 WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
1362                                             (void *)TIMER_VTIMER));
1363         }
1364
1365         if (info->physical_irq > 0) {
1366                 host_ptimer_irq = info->physical_irq;
1367                 kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
1368
1369                 if (domain)
1370                         WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
1371                                                     (void *)TIMER_PTIMER));
1372         }
1373
1374         return 0;
1375 }
1376
1377 int __init kvm_timer_hyp_init(bool has_gic)
1378 {
1379         struct arch_timer_kvm_info *info;
1380         int err;
1381
1382         info = arch_timer_get_kvm_info();
1383         timecounter = &info->timecounter;
1384
1385         if (!timecounter->cc) {
1386                 kvm_err("kvm_arch_timer: uninitialized timecounter\n");
1387                 return -ENODEV;
1388         }
1389
1390         err = kvm_irq_init(info);
1391         if (err)
1392                 return err;
1393
1394         /* First, do the virtual EL1 timer irq */
1395
1396         err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
1397                                  "kvm guest vtimer", kvm_get_running_vcpus());
1398         if (err) {
1399                 kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
1400                         host_vtimer_irq, err);
1401                 return err;
1402         }
1403
1404         if (has_gic) {
1405                 err = irq_set_vcpu_affinity(host_vtimer_irq,
1406                                             kvm_get_running_vcpus());
1407                 if (err) {
1408                         kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1409                         goto out_free_vtimer_irq;
1410                 }
1411
1412                 static_branch_enable(&has_gic_active_state);
1413         }
1414
1415         kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
1416
1417         /* Now let's do the physical EL1 timer irq */
1418
1419         if (info->physical_irq > 0) {
1420                 err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
1421                                          "kvm guest ptimer", kvm_get_running_vcpus());
1422                 if (err) {
1423                         kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
1424                                 host_ptimer_irq, err);
1425                         goto out_free_vtimer_irq;
1426                 }
1427
1428                 if (has_gic) {
1429                         err = irq_set_vcpu_affinity(host_ptimer_irq,
1430                                                     kvm_get_running_vcpus());
1431                         if (err) {
1432                                 kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
1433                                 goto out_free_ptimer_irq;
1434                         }
1435                 }
1436
1437                 kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
1438         } else if (has_vhe()) {
1439                 kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
1440                         info->physical_irq);
1441                 err = -ENODEV;
1442                 goto out_free_vtimer_irq;
1443         }
1444
1445         return 0;
1446
1447 out_free_ptimer_irq:
1448         if (info->physical_irq > 0)
1449                 free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus());
1450 out_free_vtimer_irq:
1451         free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
1452         return err;
1453 }
1454
1455 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
1456 {
1457         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1458
1459         soft_timer_cancel(&timer->bg_timer);
1460 }
1461
1462 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
1463 {
1464         u32 ppis = 0;
1465         bool valid;
1466
1467         mutex_lock(&vcpu->kvm->arch.config_lock);
1468
1469         for (int i = 0; i < nr_timers(vcpu); i++) {
1470                 struct arch_timer_context *ctx;
1471                 int irq;
1472
1473                 ctx = vcpu_get_timer(vcpu, i);
1474                 irq = timer_irq(ctx);
1475                 if (kvm_vgic_set_owner(vcpu, irq, ctx))
1476                         break;
1477
1478                 /*
1479                  * We know by construction that we only have PPIs, so
1480                  * all values are less than 32.
1481                  */
1482                 ppis |= BIT(irq);
1483         }
1484
1485         valid = hweight32(ppis) == nr_timers(vcpu);
1486
1487         if (valid)
1488                 set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
1489
1490         mutex_unlock(&vcpu->kvm->arch.config_lock);
1491
1492         return valid;
1493 }
1494
1495 static bool kvm_arch_timer_get_input_level(int vintid)
1496 {
1497         struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
1498
1499         if (WARN(!vcpu, "No vcpu context!\n"))
1500                 return false;
1501
1502         for (int i = 0; i < nr_timers(vcpu); i++) {
1503                 struct arch_timer_context *ctx;
1504
1505                 ctx = vcpu_get_timer(vcpu, i);
1506                 if (timer_irq(ctx) == vintid)
1507                         return kvm_timer_should_fire(ctx);
1508         }
1509
1510         /* A timer IRQ has fired, but no matching timer was found? */
1511         WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid);
1512
1513         return false;
1514 }
1515
1516 int kvm_timer_enable(struct kvm_vcpu *vcpu)
1517 {
1518         struct arch_timer_cpu *timer = vcpu_timer(vcpu);
1519         struct timer_map map;
1520         int ret;
1521
1522         if (timer->enabled)
1523                 return 0;
1524
1525         /* Without a VGIC we do not map virtual IRQs to physical IRQs */
1526         if (!irqchip_in_kernel(vcpu->kvm))
1527                 goto no_vgic;
1528
1529         /*
1530          * At this stage, we have the guarantee that the vgic is both
1531          * available and initialized.
1532          */
1533         if (!timer_irqs_are_valid(vcpu)) {
1534                 kvm_debug("incorrectly configured timer irqs\n");
1535                 return -EINVAL;
1536         }
1537
1538         get_timer_map(vcpu, &map);
1539
1540         ret = kvm_vgic_map_phys_irq(vcpu,
1541                                     map.direct_vtimer->host_timer_irq,
1542                                     timer_irq(map.direct_vtimer),
1543                                     &arch_timer_irq_ops);
1544         if (ret)
1545                 return ret;
1546
1547         if (map.direct_ptimer) {
1548                 ret = kvm_vgic_map_phys_irq(vcpu,
1549                                             map.direct_ptimer->host_timer_irq,
1550                                             timer_irq(map.direct_ptimer),
1551                                             &arch_timer_irq_ops);
1552         }
1553
1554         if (ret)
1555                 return ret;
1556
1557 no_vgic:
1558         timer->enabled = 1;
1559         return 0;
1560 }
1561
1562 /* If we have CNTPOFF, permanently set ECV to enable it */
1563 void kvm_timer_init_vhe(void)
1564 {
1565         if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
1566                 sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
1567 }
1568
1569 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1570 {
1571         int __user *uaddr = (int __user *)(long)attr->addr;
1572         int irq, idx, ret = 0;
1573
1574         if (!irqchip_in_kernel(vcpu->kvm))
1575                 return -EINVAL;
1576
1577         if (get_user(irq, uaddr))
1578                 return -EFAULT;
1579
1580         if (!(irq_is_ppi(irq)))
1581                 return -EINVAL;
1582
1583         mutex_lock(&vcpu->kvm->arch.config_lock);
1584
1585         if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
1586                      &vcpu->kvm->arch.flags)) {
1587                 ret = -EBUSY;
1588                 goto out;
1589         }
1590
1591         switch (attr->attr) {
1592         case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1593                 idx = TIMER_VTIMER;
1594                 break;
1595         case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1596                 idx = TIMER_PTIMER;
1597                 break;
1598         case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1599                 idx = TIMER_HVTIMER;
1600                 break;
1601         case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1602                 idx = TIMER_HPTIMER;
1603                 break;
1604         default:
1605                 ret = -ENXIO;
1606                 goto out;
1607         }
1608
1609         /*
1610          * We cannot validate the IRQ unicity before we run, so take it at
1611          * face value. The verdict will be given on first vcpu run, for each
1612          * vcpu. Yes this is late. Blame it on the stupid API.
1613          */
1614         vcpu->kvm->arch.timer_data.ppi[idx] = irq;
1615
1616 out:
1617         mutex_unlock(&vcpu->kvm->arch.config_lock);
1618         return ret;
1619 }
1620
1621 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1622 {
1623         int __user *uaddr = (int __user *)(long)attr->addr;
1624         struct arch_timer_context *timer;
1625         int irq;
1626
1627         switch (attr->attr) {
1628         case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1629                 timer = vcpu_vtimer(vcpu);
1630                 break;
1631         case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1632                 timer = vcpu_ptimer(vcpu);
1633                 break;
1634         case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1635                 timer = vcpu_hvtimer(vcpu);
1636                 break;
1637         case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1638                 timer = vcpu_hptimer(vcpu);
1639                 break;
1640         default:
1641                 return -ENXIO;
1642         }
1643
1644         irq = timer_irq(timer);
1645         return put_user(irq, uaddr);
1646 }
1647
1648 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1649 {
1650         switch (attr->attr) {
1651         case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
1652         case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
1653         case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
1654         case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
1655                 return 0;
1656         }
1657
1658         return -ENXIO;
1659 }
1660
1661 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
1662                                     struct kvm_arm_counter_offset *offset)
1663 {
1664         int ret = 0;
1665
1666         if (offset->reserved)
1667                 return -EINVAL;
1668
1669         mutex_lock(&kvm->lock);
1670
1671         if (lock_all_vcpus(kvm)) {
1672                 set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
1673
1674                 /*
1675                  * If userspace decides to set the offset using this
1676                  * API rather than merely restoring the counter
1677                  * values, the offset applies to both the virtual and
1678                  * physical views.
1679                  */
1680                 kvm->arch.timer_data.voffset = offset->counter_offset;
1681                 kvm->arch.timer_data.poffset = offset->counter_offset;
1682
1683                 unlock_all_vcpus(kvm);
1684         } else {
1685                 ret = -EBUSY;
1686         }
1687
1688         mutex_unlock(&kvm->lock);
1689
1690         return ret;
1691 }