Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 19 Mar 2016 17:05:34 +0000 (10:05 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 19 Mar 2016 17:05:34 +0000 (10:05 -0700)
Pull networking updates from David Miller:
 "Highlights:

   1) Support more Realtek wireless chips, from Jes Sorenson.

   2) New BPF types for per-cpu hash and arrap maps, from Alexei
      Starovoitov.

   3) Make several TCP sysctls per-namespace, from Nikolay Borisov.

   4) Allow the use of SO_REUSEPORT in order to do per-thread processing
   of incoming TCP/UDP connections.  The muxing can be done using a
   BPF program which hashes the incoming packet.  From Craig Gallek.

   5) Add a multiplexer for TCP streams, to provide a messaged based
      interface.  BPF programs can be used to determine the message
      boundaries.  From Tom Herbert.

   6) Add 802.1AE MACSEC support, from Sabrina Dubroca.

   7) Avoid factorial complexity when taking down an inetdev interface
      with lots of configured addresses.  We were doing things like
      traversing the entire address less for each address removed, and
      flushing the entire netfilter conntrack table for every address as
      well.

   8) Add and use SKB bulk free infrastructure, from Jesper Brouer.

   9) Allow offloading u32 classifiers to hardware, and implement for
      ixgbe, from John Fastabend.

  10) Allow configuring IRQ coalescing parameters on a per-queue basis,
      from Kan Liang.

  11) Extend ethtool so that larger link mode masks can be supported.
      From David Decotigny.

  12) Introduce devlink, which can be used to configure port link types
      (ethernet vs Infiniband, etc.), port splitting, and switch device
      level attributes as a whole.  From Jiri Pirko.

  13) Hardware offload support for flower classifiers, from Amir Vadai.

  14) Add "Local Checksum Offload".  Basically, for a tunneled packet
      the checksum of the outer header is 'constant' (because with the
      checksum field filled into the inner protocol header, the payload
      of the outer frame checksums to 'zero'), and we can take advantage
      of that in various ways.  From Edward Cree"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1548 commits)
  bonding: fix bond_get_stats()
  net: bcmgenet: fix dma api length mismatch
  net/mlx4_core: Fix backward compatibility on VFs
  phy: mdio-thunder: Fix some Kconfig typos
  lan78xx: add ndo_get_stats64
  lan78xx: handle statistics counter rollover
  RDS: TCP: Remove unused constant
  RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
  net: smc911x: convert pxa dma to dmaengine
  team: remove duplicate set of flag IFF_MULTICAST
  bonding: remove duplicate set of flag IFF_MULTICAST
  net: fix a comment typo
  ethernet: micrel: fix some error codes
  ip_tunnels, bpf: define IP_TUNNEL_OPTS_MAX and use it
  bpf, dst: add and use dst_tclassid helper
  bpf: make skb->tc_classid also readable
  net: mvneta: bm: clarify dependencies
  cls_bpf: reset class and reuse major in da
  ldmvsw: Checkpatch sunvnet.c and sunvnet_common.c
  ldmvsw: Add ldmvsw.c driver code
  ...

42 files changed:
1  2 
Documentation/devicetree/bindings/vendor-prefixes.txt
MAINTAINERS
arch/arm/boot/dts/armada-xp-db.dts
arch/arm/boot/dts/armada-xp-gp.dts
arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
arch/arm64/boot/dts/apm/apm-storm.dtsi
arch/tile/configs/tilegx_defconfig
arch/tile/configs/tilepro_defconfig
arch/x86/events/core.c
arch/x86/kernel/dumpstack.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx5/main.c
drivers/net/ethernet/freescale/gianfar.c
drivers/net/ethernet/intel/fm10k/fm10k_main.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/port.c
drivers/net/wireless/marvell/mwifiex/debugfs.c
include/linux/kernel.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/linux/perf_event.h
include/net/sctp/structs.h
include/net/tcp.h
include/uapi/linux/Kbuild
lib/Kconfig.debug
net/core/sock.c
net/ipv4/syncookies.c
net/ipv4/tcp.c
net/ipv4/tcp_fastopen.c
net/ipv4/tcp_ipv4.c
net/ipv6/syncookies.c
net/ipv6/tcp_ipv6.c
net/rxrpc/ar-internal.h
net/rxrpc/rxkad.c
net/sctp/sm_make_chunk.c
net/sctp/socket.c

diff --cc MAINTAINERS
Simple merge
index ebe1d267406df5ab30e3a3189b669733eb8fcaa4,30657302305d737375014d9ff363b3a67d54f2bd..cca366590561077b2fd5aeeb358c97eced2bfa66
@@@ -76,8 -76,9 +76,9 @@@
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
                          MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
 -                        MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
 -                        MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000
 +                        MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
++                        MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000
+                         MBUS_ID(0x0c, 0x04) 0 0 0xf1200000 0x100000>;
  
                devbus-bootcs {
                        status = "okay";
index 5730b875c4f51a1aa2743d8881b18d6dbc0b27cd,a1ded01d0c0762da87f81c27c14e1377811e2d3f..061f4237760e7c917d11056b97b5971bcd4680b5
@@@ -95,8 -95,9 +95,9 @@@
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
                          MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
 -                        MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
 -                        MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000
 +                        MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
++                        MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000
+                         MBUS_ID(0x0c, 0x04) 0 0 0xf1200000 0x100000>;
  
                devbus-bootcs {
                        status = "okay";
index 853bd392a4fe20155ed1469a23175213814ca9dc,3aa29a91c7b8c5b4b17af8fe1fecae2cf04e3c8b..ed3b889d16ce439a4e933cdcd17ff37ed6adc4c4
        soc {
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
 -                        MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x8000000
 -                        MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
 -                        MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000
 +                        MBUS_ID(0x01, 0x2f) 0 0 0xe8000000 0x8000000
 +                        MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
++                        MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000
+                         MBUS_ID(0x0c, 0x04) 0 0 0xd1200000 0x100000>;
  
                devbus-bootcs {
                        status = "okay";
Simple merge
Simple merge
index 5e830d0c95c999780bae39ecc1ce84c003fa1ece,0000000000000000000000000000000000000000..9b6ad08aa51a7e2ebc432e38b54b19b2021f266f
mode 100644,000000..100644
--- /dev/null
@@@ -1,2442 -1,0 +1,2442 @@@
- static void backtrace_address(void *data, unsigned long addr, int reliable)
 +/*
 + * Performance events x86 architecture code
 + *
 + *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
 + *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
 + *  Copyright (C) 2009 Jaswinder Singh Rajput
 + *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
 + *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
 + *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
 + *  Copyright (C) 2009 Google, Inc., Stephane Eranian
 + *
 + *  For licencing details see kernel-base/COPYING
 + */
 +
 +#include <linux/perf_event.h>
 +#include <linux/capability.h>
 +#include <linux/notifier.h>
 +#include <linux/hardirq.h>
 +#include <linux/kprobes.h>
 +#include <linux/module.h>
 +#include <linux/kdebug.h>
 +#include <linux/sched.h>
 +#include <linux/uaccess.h>
 +#include <linux/slab.h>
 +#include <linux/cpu.h>
 +#include <linux/bitops.h>
 +#include <linux/device.h>
 +
 +#include <asm/apic.h>
 +#include <asm/stacktrace.h>
 +#include <asm/nmi.h>
 +#include <asm/smp.h>
 +#include <asm/alternative.h>
 +#include <asm/mmu_context.h>
 +#include <asm/tlbflush.h>
 +#include <asm/timer.h>
 +#include <asm/desc.h>
 +#include <asm/ldt.h>
 +
 +#include "perf_event.h"
 +
 +struct x86_pmu x86_pmu __read_mostly;
 +
 +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 +      .enabled = 1,
 +};
 +
 +struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
 +
 +u64 __read_mostly hw_cache_event_ids
 +                              [PERF_COUNT_HW_CACHE_MAX]
 +                              [PERF_COUNT_HW_CACHE_OP_MAX]
 +                              [PERF_COUNT_HW_CACHE_RESULT_MAX];
 +u64 __read_mostly hw_cache_extra_regs
 +                              [PERF_COUNT_HW_CACHE_MAX]
 +                              [PERF_COUNT_HW_CACHE_OP_MAX]
 +                              [PERF_COUNT_HW_CACHE_RESULT_MAX];
 +
 +/*
 + * Propagate event elapsed time into the generic event.
 + * Can only be executed on the CPU where the event is active.
 + * Returns the delta events processed.
 + */
 +u64 x86_perf_event_update(struct perf_event *event)
 +{
 +      struct hw_perf_event *hwc = &event->hw;
 +      int shift = 64 - x86_pmu.cntval_bits;
 +      u64 prev_raw_count, new_raw_count;
 +      int idx = hwc->idx;
 +      s64 delta;
 +
 +      if (idx == INTEL_PMC_IDX_FIXED_BTS)
 +              return 0;
 +
 +      /*
 +       * Careful: an NMI might modify the previous event value.
 +       *
 +       * Our tactic to handle this is to first atomically read and
 +       * exchange a new raw count - then add that new-prev delta
 +       * count to the generic event atomically:
 +       */
 +again:
 +      prev_raw_count = local64_read(&hwc->prev_count);
 +      rdpmcl(hwc->event_base_rdpmc, new_raw_count);
 +
 +      if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 +                                      new_raw_count) != prev_raw_count)
 +              goto again;
 +
 +      /*
 +       * Now we have the new raw value and have updated the prev
 +       * timestamp already. We can now calculate the elapsed delta
 +       * (event-)time and add that to the generic event.
 +       *
 +       * Careful, not all hw sign-extends above the physical width
 +       * of the count.
 +       */
 +      delta = (new_raw_count << shift) - (prev_raw_count << shift);
 +      delta >>= shift;
 +
 +      local64_add(delta, &event->count);
 +      local64_sub(delta, &hwc->period_left);
 +
 +      return new_raw_count;
 +}
 +
 +/*
 + * Find and validate any extra registers to set up.
 + */
 +static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 +{
 +      struct hw_perf_event_extra *reg;
 +      struct extra_reg *er;
 +
 +      reg = &event->hw.extra_reg;
 +
 +      if (!x86_pmu.extra_regs)
 +              return 0;
 +
 +      for (er = x86_pmu.extra_regs; er->msr; er++) {
 +              if (er->event != (config & er->config_mask))
 +                      continue;
 +              if (event->attr.config1 & ~er->valid_mask)
 +                      return -EINVAL;
 +              /* Check if the extra msrs can be safely accessed*/
 +              if (!er->extra_msr_access)
 +                      return -ENXIO;
 +
 +              reg->idx = er->idx;
 +              reg->config = event->attr.config1;
 +              reg->reg = er->msr;
 +              break;
 +      }
 +      return 0;
 +}
 +
 +static atomic_t active_events;
 +static atomic_t pmc_refcount;
 +static DEFINE_MUTEX(pmc_reserve_mutex);
 +
 +#ifdef CONFIG_X86_LOCAL_APIC
 +
 +static bool reserve_pmc_hardware(void)
 +{
 +      int i;
 +
 +      for (i = 0; i < x86_pmu.num_counters; i++) {
 +              if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
 +                      goto perfctr_fail;
 +      }
 +
 +      for (i = 0; i < x86_pmu.num_counters; i++) {
 +              if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
 +                      goto eventsel_fail;
 +      }
 +
 +      return true;
 +
 +eventsel_fail:
 +      for (i--; i >= 0; i--)
 +              release_evntsel_nmi(x86_pmu_config_addr(i));
 +
 +      i = x86_pmu.num_counters;
 +
 +perfctr_fail:
 +      for (i--; i >= 0; i--)
 +              release_perfctr_nmi(x86_pmu_event_addr(i));
 +
 +      return false;
 +}
 +
 +static void release_pmc_hardware(void)
 +{
 +      int i;
 +
 +      for (i = 0; i < x86_pmu.num_counters; i++) {
 +              release_perfctr_nmi(x86_pmu_event_addr(i));
 +              release_evntsel_nmi(x86_pmu_config_addr(i));
 +      }
 +}
 +
 +#else
 +
 +static bool reserve_pmc_hardware(void) { return true; }
 +static void release_pmc_hardware(void) {}
 +
 +#endif
 +
 +static bool check_hw_exists(void)
 +{
 +      u64 val, val_fail, val_new= ~0;
 +      int i, reg, reg_fail, ret = 0;
 +      int bios_fail = 0;
 +      int reg_safe = -1;
 +
 +      /*
 +       * Check to see if the BIOS enabled any of the counters, if so
 +       * complain and bail.
 +       */
 +      for (i = 0; i < x86_pmu.num_counters; i++) {
 +              reg = x86_pmu_config_addr(i);
 +              ret = rdmsrl_safe(reg, &val);
 +              if (ret)
 +                      goto msr_fail;
 +              if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
 +                      bios_fail = 1;
 +                      val_fail = val;
 +                      reg_fail = reg;
 +              } else {
 +                      reg_safe = i;
 +              }
 +      }
 +
 +      if (x86_pmu.num_counters_fixed) {
 +              reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 +              ret = rdmsrl_safe(reg, &val);
 +              if (ret)
 +                      goto msr_fail;
 +              for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
 +                      if (val & (0x03 << i*4)) {
 +                              bios_fail = 1;
 +                              val_fail = val;
 +                              reg_fail = reg;
 +                      }
 +              }
 +      }
 +
 +      /*
 +       * If all the counters are enabled, the below test will always
 +       * fail.  The tools will also become useless in this scenario.
 +       * Just fail and disable the hardware counters.
 +       */
 +
 +      if (reg_safe == -1) {
 +              reg = reg_safe;
 +              goto msr_fail;
 +      }
 +
 +      /*
 +       * Read the current value, change it and read it back to see if it
 +       * matches, this is needed to detect certain hardware emulators
 +       * (qemu/kvm) that don't trap on the MSR access and always return 0s.
 +       */
 +      reg = x86_pmu_event_addr(reg_safe);
 +      if (rdmsrl_safe(reg, &val))
 +              goto msr_fail;
 +      val ^= 0xffffUL;
 +      ret = wrmsrl_safe(reg, val);
 +      ret |= rdmsrl_safe(reg, &val_new);
 +      if (ret || val != val_new)
 +              goto msr_fail;
 +
 +      /*
 +       * We still allow the PMU driver to operate:
 +       */
 +      if (bios_fail) {
 +              pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
 +              pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
 +                            reg_fail, val_fail);
 +      }
 +
 +      return true;
 +
 +msr_fail:
 +      pr_cont("Broken PMU hardware detected, using software events only.\n");
 +      pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
 +              boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
 +              reg, val_new);
 +
 +      return false;
 +}
 +
 +static void hw_perf_event_destroy(struct perf_event *event)
 +{
 +      x86_release_hardware();
 +      atomic_dec(&active_events);
 +}
 +
 +void hw_perf_lbr_event_destroy(struct perf_event *event)
 +{
 +      hw_perf_event_destroy(event);
 +
 +      /* undo the lbr/bts event accounting */
 +      x86_del_exclusive(x86_lbr_exclusive_lbr);
 +}
 +
 +static inline int x86_pmu_initialized(void)
 +{
 +      return x86_pmu.handle_irq != NULL;
 +}
 +
 +static inline int
 +set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
 +{
 +      struct perf_event_attr *attr = &event->attr;
 +      unsigned int cache_type, cache_op, cache_result;
 +      u64 config, val;
 +
 +      config = attr->config;
 +
 +      cache_type = (config >>  0) & 0xff;
 +      if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
 +              return -EINVAL;
 +
 +      cache_op = (config >>  8) & 0xff;
 +      if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
 +              return -EINVAL;
 +
 +      cache_result = (config >> 16) & 0xff;
 +      if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 +              return -EINVAL;
 +
 +      val = hw_cache_event_ids[cache_type][cache_op][cache_result];
 +
 +      if (val == 0)
 +              return -ENOENT;
 +
 +      if (val == -1)
 +              return -EINVAL;
 +
 +      hwc->config |= val;
 +      attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
 +      return x86_pmu_extra_regs(val, event);
 +}
 +
 +int x86_reserve_hardware(void)
 +{
 +      int err = 0;
 +
 +      if (!atomic_inc_not_zero(&pmc_refcount)) {
 +              mutex_lock(&pmc_reserve_mutex);
 +              if (atomic_read(&pmc_refcount) == 0) {
 +                      if (!reserve_pmc_hardware())
 +                              err = -EBUSY;
 +                      else
 +                              reserve_ds_buffers();
 +              }
 +              if (!err)
 +                      atomic_inc(&pmc_refcount);
 +              mutex_unlock(&pmc_reserve_mutex);
 +      }
 +
 +      return err;
 +}
 +
 +void x86_release_hardware(void)
 +{
 +      if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
 +              release_pmc_hardware();
 +              release_ds_buffers();
 +              mutex_unlock(&pmc_reserve_mutex);
 +      }
 +}
 +
 +/*
 + * Check if we can create event of a certain type (that no conflicting events
 + * are present).
 + */
 +int x86_add_exclusive(unsigned int what)
 +{
 +      int i;
 +
 +      if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
 +              mutex_lock(&pmc_reserve_mutex);
 +              for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
 +                      if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
 +                              goto fail_unlock;
 +              }
 +              atomic_inc(&x86_pmu.lbr_exclusive[what]);
 +              mutex_unlock(&pmc_reserve_mutex);
 +      }
 +
 +      atomic_inc(&active_events);
 +      return 0;
 +
 +fail_unlock:
 +      mutex_unlock(&pmc_reserve_mutex);
 +      return -EBUSY;
 +}
 +
 +void x86_del_exclusive(unsigned int what)
 +{
 +      atomic_dec(&x86_pmu.lbr_exclusive[what]);
 +      atomic_dec(&active_events);
 +}
 +
 +int x86_setup_perfctr(struct perf_event *event)
 +{
 +      struct perf_event_attr *attr = &event->attr;
 +      struct hw_perf_event *hwc = &event->hw;
 +      u64 config;
 +
 +      if (!is_sampling_event(event)) {
 +              hwc->sample_period = x86_pmu.max_period;
 +              hwc->last_period = hwc->sample_period;
 +              local64_set(&hwc->period_left, hwc->sample_period);
 +      }
 +
 +      if (attr->type == PERF_TYPE_RAW)
 +              return x86_pmu_extra_regs(event->attr.config, event);
 +
 +      if (attr->type == PERF_TYPE_HW_CACHE)
 +              return set_ext_hw_attr(hwc, event);
 +
 +      if (attr->config >= x86_pmu.max_events)
 +              return -EINVAL;
 +
 +      /*
 +       * The generic map:
 +       */
 +      config = x86_pmu.event_map(attr->config);
 +
 +      if (config == 0)
 +              return -ENOENT;
 +
 +      if (config == -1LL)
 +              return -EINVAL;
 +
 +      /*
 +       * Branch tracing:
 +       */
 +      if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
 +          !attr->freq && hwc->sample_period == 1) {
 +              /* BTS is not supported by this architecture. */
 +              if (!x86_pmu.bts_active)
 +                      return -EOPNOTSUPP;
 +
 +              /* BTS is currently only allowed for user-mode. */
 +              if (!attr->exclude_kernel)
 +                      return -EOPNOTSUPP;
 +
 +              /* disallow bts if conflicting events are present */
 +              if (x86_add_exclusive(x86_lbr_exclusive_lbr))
 +                      return -EBUSY;
 +
 +              event->destroy = hw_perf_lbr_event_destroy;
 +      }
 +
 +      hwc->config |= config;
 +
 +      return 0;
 +}
 +
 +/*
 + * check that branch_sample_type is compatible with
 + * settings needed for precise_ip > 1 which implies
 + * using the LBR to capture ALL taken branches at the
 + * priv levels of the measurement
 + */
 +static inline int precise_br_compat(struct perf_event *event)
 +{
 +      u64 m = event->attr.branch_sample_type;
 +      u64 b = 0;
 +
 +      /* must capture all branches */
 +      if (!(m & PERF_SAMPLE_BRANCH_ANY))
 +              return 0;
 +
 +      m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
 +
 +      if (!event->attr.exclude_user)
 +              b |= PERF_SAMPLE_BRANCH_USER;
 +
 +      if (!event->attr.exclude_kernel)
 +              b |= PERF_SAMPLE_BRANCH_KERNEL;
 +
 +      /*
 +       * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
 +       */
 +
 +      return m == b;
 +}
 +
 +int x86_pmu_hw_config(struct perf_event *event)
 +{
 +      if (event->attr.precise_ip) {
 +              int precise = 0;
 +
 +              /* Support for constant skid */
 +              if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
 +                      precise++;
 +
 +                      /* Support for IP fixup */
 +                      if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
 +                              precise++;
 +
 +                      if (x86_pmu.pebs_prec_dist)
 +                              precise++;
 +              }
 +
 +              if (event->attr.precise_ip > precise)
 +                      return -EOPNOTSUPP;
 +      }
 +      /*
 +       * check that PEBS LBR correction does not conflict with
 +       * whatever the user is asking with attr->branch_sample_type
 +       */
 +      if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
 +              u64 *br_type = &event->attr.branch_sample_type;
 +
 +              if (has_branch_stack(event)) {
 +                      if (!precise_br_compat(event))
 +                              return -EOPNOTSUPP;
 +
 +                      /* branch_sample_type is compatible */
 +
 +              } else {
 +                      /*
 +                       * user did not specify  branch_sample_type
 +                       *
 +                       * For PEBS fixups, we capture all
 +                       * the branches at the priv level of the
 +                       * event.
 +                       */
 +                      *br_type = PERF_SAMPLE_BRANCH_ANY;
 +
 +                      if (!event->attr.exclude_user)
 +                              *br_type |= PERF_SAMPLE_BRANCH_USER;
 +
 +                      if (!event->attr.exclude_kernel)
 +                              *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
 +              }
 +      }
 +
 +      if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
 +              event->attach_state |= PERF_ATTACH_TASK_DATA;
 +
 +      /*
 +       * Generate PMC IRQs:
 +       * (keep 'enabled' bit clear for now)
 +       */
 +      event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
 +
 +      /*
 +       * Count user and OS events unless requested not to
 +       */
 +      if (!event->attr.exclude_user)
 +              event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
 +      if (!event->attr.exclude_kernel)
 +              event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
 +
 +      if (event->attr.type == PERF_TYPE_RAW)
 +              event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
 +
 +      if (event->attr.sample_period && x86_pmu.limit_period) {
 +              if (x86_pmu.limit_period(event, event->attr.sample_period) >
 +                              event->attr.sample_period)
 +                      return -EINVAL;
 +      }
 +
 +      return x86_setup_perfctr(event);
 +}
 +
 +/*
 + * Setup the hardware configuration for a given attr_type
 + */
 +static int __x86_pmu_event_init(struct perf_event *event)
 +{
 +      int err;
 +
 +      if (!x86_pmu_initialized())
 +              return -ENODEV;
 +
 +      err = x86_reserve_hardware();
 +      if (err)
 +              return err;
 +
 +      atomic_inc(&active_events);
 +      event->destroy = hw_perf_event_destroy;
 +
 +      event->hw.idx = -1;
 +      event->hw.last_cpu = -1;
 +      event->hw.last_tag = ~0ULL;
 +
 +      /* mark unused */
 +      event->hw.extra_reg.idx = EXTRA_REG_NONE;
 +      event->hw.branch_reg.idx = EXTRA_REG_NONE;
 +
 +      return x86_pmu.hw_config(event);
 +}
 +
 +void x86_pmu_disable_all(void)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      int idx;
 +
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 +              u64 val;
 +
 +              if (!test_bit(idx, cpuc->active_mask))
 +                      continue;
 +              rdmsrl(x86_pmu_config_addr(idx), val);
 +              if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
 +                      continue;
 +              val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
 +              wrmsrl(x86_pmu_config_addr(idx), val);
 +      }
 +}
 +
 +/*
 + * There may be PMI landing after enabled=0. The PMI hitting could be before or
 + * after disable_all.
 + *
 + * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
 + * It will not be re-enabled in the NMI handler again, because enabled=0. After
 + * handling the NMI, disable_all will be called, which will not change the
 + * state either. If PMI hits after disable_all, the PMU is already disabled
 + * before entering NMI handler. The NMI handler will not change the state
 + * either.
 + *
 + * So either situation is harmless.
 + */
 +static void x86_pmu_disable(struct pmu *pmu)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +
 +      if (!x86_pmu_initialized())
 +              return;
 +
 +      if (!cpuc->enabled)
 +              return;
 +
 +      cpuc->n_added = 0;
 +      cpuc->enabled = 0;
 +      barrier();
 +
 +      x86_pmu.disable_all();
 +}
 +
 +void x86_pmu_enable_all(int added)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      int idx;
 +
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 +              struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
 +
 +              if (!test_bit(idx, cpuc->active_mask))
 +                      continue;
 +
 +              __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
 +      }
 +}
 +
 +static struct pmu pmu;
 +
 +static inline int is_x86_event(struct perf_event *event)
 +{
 +      return event->pmu == &pmu;
 +}
 +
 +/*
 + * Event scheduler state:
 + *
 + * Assign events iterating over all events and counters, beginning
 + * with events with least weights first. Keep the current iterator
 + * state in struct sched_state.
 + */
 +struct sched_state {
 +      int     weight;
 +      int     event;          /* event index */
 +      int     counter;        /* counter index */
 +      int     unassigned;     /* number of events to be assigned left */
 +      int     nr_gp;          /* number of GP counters used */
 +      unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 +};
 +
 +/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
 +#define       SCHED_STATES_MAX        2
 +
 +struct perf_sched {
 +      int                     max_weight;
 +      int                     max_events;
 +      int                     max_gp;
 +      int                     saved_states;
 +      struct event_constraint **constraints;
 +      struct sched_state      state;
 +      struct sched_state      saved[SCHED_STATES_MAX];
 +};
 +
 +/*
 + * Initialize interator that runs through all events and counters.
 + */
 +static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
 +                          int num, int wmin, int wmax, int gpmax)
 +{
 +      int idx;
 +
 +      memset(sched, 0, sizeof(*sched));
 +      sched->max_events       = num;
 +      sched->max_weight       = wmax;
 +      sched->max_gp           = gpmax;
 +      sched->constraints      = constraints;
 +
 +      for (idx = 0; idx < num; idx++) {
 +              if (constraints[idx]->weight == wmin)
 +                      break;
 +      }
 +
 +      sched->state.event      = idx;          /* start with min weight */
 +      sched->state.weight     = wmin;
 +      sched->state.unassigned = num;
 +}
 +
 +static void perf_sched_save_state(struct perf_sched *sched)
 +{
 +      if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
 +              return;
 +
 +      sched->saved[sched->saved_states] = sched->state;
 +      sched->saved_states++;
 +}
 +
 +static bool perf_sched_restore_state(struct perf_sched *sched)
 +{
 +      if (!sched->saved_states)
 +              return false;
 +
 +      sched->saved_states--;
 +      sched->state = sched->saved[sched->saved_states];
 +
 +      /* continue with next counter: */
 +      clear_bit(sched->state.counter++, sched->state.used);
 +
 +      return true;
 +}
 +
 +/*
 + * Select a counter for the current event to schedule. Return true on
 + * success.
 + */
 +static bool __perf_sched_find_counter(struct perf_sched *sched)
 +{
 +      struct event_constraint *c;
 +      int idx;
 +
 +      if (!sched->state.unassigned)
 +              return false;
 +
 +      if (sched->state.event >= sched->max_events)
 +              return false;
 +
 +      c = sched->constraints[sched->state.event];
 +      /* Prefer fixed purpose counters */
 +      if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
 +              idx = INTEL_PMC_IDX_FIXED;
 +              for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
 +                      if (!__test_and_set_bit(idx, sched->state.used))
 +                              goto done;
 +              }
 +      }
 +
 +      /* Grab the first unused counter starting with idx */
 +      idx = sched->state.counter;
 +      for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
 +              if (!__test_and_set_bit(idx, sched->state.used)) {
 +                      if (sched->state.nr_gp++ >= sched->max_gp)
 +                              return false;
 +
 +                      goto done;
 +              }
 +      }
 +
 +      return false;
 +
 +done:
 +      sched->state.counter = idx;
 +
 +      if (c->overlap)
 +              perf_sched_save_state(sched);
 +
 +      return true;
 +}
 +
 +static bool perf_sched_find_counter(struct perf_sched *sched)
 +{
 +      while (!__perf_sched_find_counter(sched)) {
 +              if (!perf_sched_restore_state(sched))
 +                      return false;
 +      }
 +
 +      return true;
 +}
 +
 +/*
 + * Go through all unassigned events and find the next one to schedule.
 + * Take events with the least weight first. Return true on success.
 + */
 +static bool perf_sched_next_event(struct perf_sched *sched)
 +{
 +      struct event_constraint *c;
 +
 +      if (!sched->state.unassigned || !--sched->state.unassigned)
 +              return false;
 +
 +      do {
 +              /* next event */
 +              sched->state.event++;
 +              if (sched->state.event >= sched->max_events) {
 +                      /* next weight */
 +                      sched->state.event = 0;
 +                      sched->state.weight++;
 +                      if (sched->state.weight > sched->max_weight)
 +                              return false;
 +              }
 +              c = sched->constraints[sched->state.event];
 +      } while (c->weight != sched->state.weight);
 +
 +      sched->state.counter = 0;       /* start with first counter */
 +
 +      return true;
 +}
 +
 +/*
 + * Assign a counter for each event.
 + */
 +int perf_assign_events(struct event_constraint **constraints, int n,
 +                      int wmin, int wmax, int gpmax, int *assign)
 +{
 +      struct perf_sched sched;
 +
 +      perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
 +
 +      do {
 +              if (!perf_sched_find_counter(&sched))
 +                      break;  /* failed */
 +              if (assign)
 +                      assign[sched.state.event] = sched.state.counter;
 +      } while (perf_sched_next_event(&sched));
 +
 +      return sched.state.unassigned;
 +}
 +EXPORT_SYMBOL_GPL(perf_assign_events);
 +
 +int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 +{
 +      struct event_constraint *c;
 +      unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 +      struct perf_event *e;
 +      int i, wmin, wmax, unsched = 0;
 +      struct hw_perf_event *hwc;
 +
 +      bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 +
 +      if (x86_pmu.start_scheduling)
 +              x86_pmu.start_scheduling(cpuc);
 +
 +      for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 +              cpuc->event_constraint[i] = NULL;
 +              c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
 +              cpuc->event_constraint[i] = c;
 +
 +              wmin = min(wmin, c->weight);
 +              wmax = max(wmax, c->weight);
 +      }
 +
 +      /*
 +       * fastpath, try to reuse previous register
 +       */
 +      for (i = 0; i < n; i++) {
 +              hwc = &cpuc->event_list[i]->hw;
 +              c = cpuc->event_constraint[i];
 +
 +              /* never assigned */
 +              if (hwc->idx == -1)
 +                      break;
 +
 +              /* constraint still honored */
 +              if (!test_bit(hwc->idx, c->idxmsk))
 +                      break;
 +
 +              /* not already used */
 +              if (test_bit(hwc->idx, used_mask))
 +                      break;
 +
 +              __set_bit(hwc->idx, used_mask);
 +              if (assign)
 +                      assign[i] = hwc->idx;
 +      }
 +
 +      /* slow path */
 +      if (i != n) {
 +              int gpmax = x86_pmu.num_counters;
 +
 +              /*
 +               * Do not allow scheduling of more than half the available
 +               * generic counters.
 +               *
 +               * This helps avoid counter starvation of sibling thread by
 +               * ensuring at most half the counters cannot be in exclusive
 +               * mode. There is no designated counters for the limits. Any
 +               * N/2 counters can be used. This helps with events with
 +               * specific counter constraints.
 +               */
 +              if (is_ht_workaround_enabled() && !cpuc->is_fake &&
 +                  READ_ONCE(cpuc->excl_cntrs->exclusive_present))
 +                      gpmax /= 2;
 +
 +              unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
 +                                           wmax, gpmax, assign);
 +      }
 +
 +      /*
 +       * In case of success (unsched = 0), mark events as committed,
 +       * so we do not put_constraint() in case new events are added
 +       * and fail to be scheduled
 +       *
 +       * We invoke the lower level commit callback to lock the resource
 +       *
 +       * We do not need to do all of this in case we are called to
 +       * validate an event group (assign == NULL)
 +       */
 +      if (!unsched && assign) {
 +              for (i = 0; i < n; i++) {
 +                      e = cpuc->event_list[i];
 +                      e->hw.flags |= PERF_X86_EVENT_COMMITTED;
 +                      if (x86_pmu.commit_scheduling)
 +                              x86_pmu.commit_scheduling(cpuc, i, assign[i]);
 +              }
 +      } else {
 +              for (i = 0; i < n; i++) {
 +                      e = cpuc->event_list[i];
 +                      /*
 +                       * do not put_constraint() on comitted events,
 +                       * because they are good to go
 +                       */
 +                      if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
 +                              continue;
 +
 +                      /*
 +                       * release events that failed scheduling
 +                       */
 +                      if (x86_pmu.put_event_constraints)
 +                              x86_pmu.put_event_constraints(cpuc, e);
 +              }
 +      }
 +
 +      if (x86_pmu.stop_scheduling)
 +              x86_pmu.stop_scheduling(cpuc);
 +
 +      return unsched ? -EINVAL : 0;
 +}
 +
 +/*
 + * dogrp: true if must collect siblings events (group)
 + * returns total number of events and error code
 + */
 +static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
 +{
 +      struct perf_event *event;
 +      int n, max_count;
 +
 +      max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
 +
 +      /* current number of events already accepted */
 +      n = cpuc->n_events;
 +
 +      if (is_x86_event(leader)) {
 +              if (n >= max_count)
 +                      return -EINVAL;
 +              cpuc->event_list[n] = leader;
 +              n++;
 +      }
 +      if (!dogrp)
 +              return n;
 +
 +      list_for_each_entry(event, &leader->sibling_list, group_entry) {
 +              if (!is_x86_event(event) ||
 +                  event->state <= PERF_EVENT_STATE_OFF)
 +                      continue;
 +
 +              if (n >= max_count)
 +                      return -EINVAL;
 +
 +              cpuc->event_list[n] = event;
 +              n++;
 +      }
 +      return n;
 +}
 +
 +static inline void x86_assign_hw_event(struct perf_event *event,
 +                              struct cpu_hw_events *cpuc, int i)
 +{
 +      struct hw_perf_event *hwc = &event->hw;
 +
 +      hwc->idx = cpuc->assign[i];
 +      hwc->last_cpu = smp_processor_id();
 +      hwc->last_tag = ++cpuc->tags[i];
 +
 +      if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
 +              hwc->config_base = 0;
 +              hwc->event_base = 0;
 +      } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
 +              hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 +              hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
 +              hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
 +      } else {
 +              hwc->config_base = x86_pmu_config_addr(hwc->idx);
 +              hwc->event_base  = x86_pmu_event_addr(hwc->idx);
 +              hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
 +      }
 +}
 +
 +static inline int match_prev_assignment(struct hw_perf_event *hwc,
 +                                      struct cpu_hw_events *cpuc,
 +                                      int i)
 +{
 +      return hwc->idx == cpuc->assign[i] &&
 +              hwc->last_cpu == smp_processor_id() &&
 +              hwc->last_tag == cpuc->tags[i];
 +}
 +
 +static void x86_pmu_start(struct perf_event *event, int flags);
 +
 +static void x86_pmu_enable(struct pmu *pmu)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      struct perf_event *event;
 +      struct hw_perf_event *hwc;
 +      int i, added = cpuc->n_added;
 +
 +      if (!x86_pmu_initialized())
 +              return;
 +
 +      if (cpuc->enabled)
 +              return;
 +
 +      if (cpuc->n_added) {
 +              int n_running = cpuc->n_events - cpuc->n_added;
 +              /*
 +               * apply assignment obtained either from
 +               * hw_perf_group_sched_in() or x86_pmu_enable()
 +               *
 +               * step1: save events moving to new counters
 +               */
 +              for (i = 0; i < n_running; i++) {
 +                      event = cpuc->event_list[i];
 +                      hwc = &event->hw;
 +
 +                      /*
 +                       * we can avoid reprogramming counter if:
 +                       * - assigned same counter as last time
 +                       * - running on same CPU as last time
 +                       * - no other event has used the counter since
 +                       */
 +                      if (hwc->idx == -1 ||
 +                          match_prev_assignment(hwc, cpuc, i))
 +                              continue;
 +
 +                      /*
 +                       * Ensure we don't accidentally enable a stopped
 +                       * counter simply because we rescheduled.
 +                       */
 +                      if (hwc->state & PERF_HES_STOPPED)
 +                              hwc->state |= PERF_HES_ARCH;
 +
 +                      x86_pmu_stop(event, PERF_EF_UPDATE);
 +              }
 +
 +              /*
 +               * step2: reprogram moved events into new counters
 +               */
 +              for (i = 0; i < cpuc->n_events; i++) {
 +                      event = cpuc->event_list[i];
 +                      hwc = &event->hw;
 +
 +                      if (!match_prev_assignment(hwc, cpuc, i))
 +                              x86_assign_hw_event(event, cpuc, i);
 +                      else if (i < n_running)
 +                              continue;
 +
 +                      if (hwc->state & PERF_HES_ARCH)
 +                              continue;
 +
 +                      x86_pmu_start(event, PERF_EF_RELOAD);
 +              }
 +              cpuc->n_added = 0;
 +              perf_events_lapic_init();
 +      }
 +
 +      cpuc->enabled = 1;
 +      barrier();
 +
 +      x86_pmu.enable_all(added);
 +}
 +
 +static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
 +
 +/*
 + * Set the next IRQ period, based on the hwc->period_left value.
 + * To be called with the event disabled in hw:
 + */
 +int x86_perf_event_set_period(struct perf_event *event)
 +{
 +      struct hw_perf_event *hwc = &event->hw;
 +      s64 left = local64_read(&hwc->period_left);
 +      s64 period = hwc->sample_period;
 +      int ret = 0, idx = hwc->idx;
 +
 +      if (idx == INTEL_PMC_IDX_FIXED_BTS)
 +              return 0;
 +
 +      /*
 +       * If we are way outside a reasonable range then just skip forward:
 +       */
 +      if (unlikely(left <= -period)) {
 +              left = period;
 +              local64_set(&hwc->period_left, left);
 +              hwc->last_period = period;
 +              ret = 1;
 +      }
 +
 +      if (unlikely(left <= 0)) {
 +              left += period;
 +              local64_set(&hwc->period_left, left);
 +              hwc->last_period = period;
 +              ret = 1;
 +      }
 +      /*
 +       * Quirk: certain CPUs dont like it if just 1 hw_event is left:
 +       */
 +      if (unlikely(left < 2))
 +              left = 2;
 +
 +      if (left > x86_pmu.max_period)
 +              left = x86_pmu.max_period;
 +
 +      if (x86_pmu.limit_period)
 +              left = x86_pmu.limit_period(event, left);
 +
 +      per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
 +
 +      if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
 +          local64_read(&hwc->prev_count) != (u64)-left) {
 +              /*
 +               * The hw event starts counting from this event offset,
 +               * mark it to be able to extra future deltas:
 +               */
 +              local64_set(&hwc->prev_count, (u64)-left);
 +
 +              wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
 +      }
 +
 +      /*
 +       * Due to erratum on certan cpu we need
 +       * a second write to be sure the register
 +       * is updated properly
 +       */
 +      if (x86_pmu.perfctr_second_write) {
 +              wrmsrl(hwc->event_base,
 +                      (u64)(-left) & x86_pmu.cntval_mask);
 +      }
 +
 +      perf_event_update_userpage(event);
 +
 +      return ret;
 +}
 +
 +void x86_pmu_enable_event(struct perf_event *event)
 +{
 +      if (__this_cpu_read(cpu_hw_events.enabled))
 +              __x86_pmu_enable_event(&event->hw,
 +                                     ARCH_PERFMON_EVENTSEL_ENABLE);
 +}
 +
 +/*
 + * Add a single event to the PMU.
 + *
 + * The event is added to the group of enabled events
 + * but only if it can be scehduled with existing events.
 + */
 +static int x86_pmu_add(struct perf_event *event, int flags)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      struct hw_perf_event *hwc;
 +      int assign[X86_PMC_IDX_MAX];
 +      int n, n0, ret;
 +
 +      hwc = &event->hw;
 +
 +      n0 = cpuc->n_events;
 +      ret = n = collect_events(cpuc, event, false);
 +      if (ret < 0)
 +              goto out;
 +
 +      hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 +      if (!(flags & PERF_EF_START))
 +              hwc->state |= PERF_HES_ARCH;
 +
 +      /*
 +       * If group events scheduling transaction was started,
 +       * skip the schedulability test here, it will be performed
 +       * at commit time (->commit_txn) as a whole.
 +       */
 +      if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 +              goto done_collect;
 +
 +      ret = x86_pmu.schedule_events(cpuc, n, assign);
 +      if (ret)
 +              goto out;
 +      /*
 +       * copy new assignment, now we know it is possible
 +       * will be used by hw_perf_enable()
 +       */
 +      memcpy(cpuc->assign, assign, n*sizeof(int));
 +
 +done_collect:
 +      /*
 +       * Commit the collect_events() state. See x86_pmu_del() and
 +       * x86_pmu_*_txn().
 +       */
 +      cpuc->n_events = n;
 +      cpuc->n_added += n - n0;
 +      cpuc->n_txn += n - n0;
 +
 +      ret = 0;
 +out:
 +      return ret;
 +}
 +
 +static void x86_pmu_start(struct perf_event *event, int flags)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      int idx = event->hw.idx;
 +
 +      if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
 +              return;
 +
 +      if (WARN_ON_ONCE(idx == -1))
 +              return;
 +
 +      if (flags & PERF_EF_RELOAD) {
 +              WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
 +              x86_perf_event_set_period(event);
 +      }
 +
 +      event->hw.state = 0;
 +
 +      cpuc->events[idx] = event;
 +      __set_bit(idx, cpuc->active_mask);
 +      __set_bit(idx, cpuc->running);
 +      x86_pmu.enable(event);
 +      perf_event_update_userpage(event);
 +}
 +
 +void perf_event_print_debug(void)
 +{
 +      u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
 +      u64 pebs, debugctl;
 +      struct cpu_hw_events *cpuc;
 +      unsigned long flags;
 +      int cpu, idx;
 +
 +      if (!x86_pmu.num_counters)
 +              return;
 +
 +      local_irq_save(flags);
 +
 +      cpu = smp_processor_id();
 +      cpuc = &per_cpu(cpu_hw_events, cpu);
 +
 +      if (x86_pmu.version >= 2) {
 +              rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 +              rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 +              rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
 +              rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
 +
 +              pr_info("\n");
 +              pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
 +              pr_info("CPU#%d: status:     %016llx\n", cpu, status);
 +              pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
 +              pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
 +              if (x86_pmu.pebs_constraints) {
 +                      rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
 +                      pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
 +              }
 +              if (x86_pmu.lbr_nr) {
 +                      rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 +                      pr_info("CPU#%d: debugctl:   %016llx\n", cpu, debugctl);
 +              }
 +      }
 +      pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 +
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 +              rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
 +              rdmsrl(x86_pmu_event_addr(idx), pmc_count);
 +
 +              prev_left = per_cpu(pmc_prev_left[idx], cpu);
 +
 +              pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
 +                      cpu, idx, pmc_ctrl);
 +              pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
 +                      cpu, idx, pmc_count);
 +              pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
 +                      cpu, idx, prev_left);
 +      }
 +      for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
 +              rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
 +
 +              pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
 +                      cpu, idx, pmc_count);
 +      }
 +      local_irq_restore(flags);
 +}
 +
 +void x86_pmu_stop(struct perf_event *event, int flags)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      struct hw_perf_event *hwc = &event->hw;
 +
 +      if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
 +              x86_pmu.disable(event);
 +              cpuc->events[hwc->idx] = NULL;
 +              WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 +              hwc->state |= PERF_HES_STOPPED;
 +      }
 +
 +      if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 +              /*
 +               * Drain the remaining delta count out of a event
 +               * that we are disabling:
 +               */
 +              x86_perf_event_update(event);
 +              hwc->state |= PERF_HES_UPTODATE;
 +      }
 +}
 +
 +static void x86_pmu_del(struct perf_event *event, int flags)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      int i;
 +
 +      /*
 +       * event is descheduled
 +       */
 +      event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
 +
 +      /*
 +       * If we're called during a txn, we don't need to do anything.
 +       * The events never got scheduled and ->cancel_txn will truncate
 +       * the event_list.
 +       *
 +       * XXX assumes any ->del() called during a TXN will only be on
 +       * an event added during that same TXN.
 +       */
 +      if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 +              return;
 +
 +      /*
 +       * Not a TXN, therefore cleanup properly.
 +       */
 +      x86_pmu_stop(event, PERF_EF_UPDATE);
 +
 +      for (i = 0; i < cpuc->n_events; i++) {
 +              if (event == cpuc->event_list[i])
 +                      break;
 +      }
 +
 +      if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
 +              return;
 +
 +      /* If we have a newly added event; make sure to decrease n_added. */
 +      if (i >= cpuc->n_events - cpuc->n_added)
 +              --cpuc->n_added;
 +
 +      if (x86_pmu.put_event_constraints)
 +              x86_pmu.put_event_constraints(cpuc, event);
 +
 +      /* Delete the array entry. */
 +      while (++i < cpuc->n_events) {
 +              cpuc->event_list[i-1] = cpuc->event_list[i];
 +              cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
 +      }
 +      --cpuc->n_events;
 +
 +      perf_event_update_userpage(event);
 +}
 +
 +int x86_pmu_handle_irq(struct pt_regs *regs)
 +{
 +      struct perf_sample_data data;
 +      struct cpu_hw_events *cpuc;
 +      struct perf_event *event;
 +      int idx, handled = 0;
 +      u64 val;
 +
 +      cpuc = this_cpu_ptr(&cpu_hw_events);
 +
 +      /*
 +       * Some chipsets need to unmask the LVTPC in a particular spot
 +       * inside the nmi handler.  As a result, the unmasking was pushed
 +       * into all the nmi handlers.
 +       *
 +       * This generic handler doesn't seem to have any issues where the
 +       * unmasking occurs so it was left at the top.
 +       */
 +      apic_write(APIC_LVTPC, APIC_DM_NMI);
 +
 +      for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 +              if (!test_bit(idx, cpuc->active_mask)) {
 +                      /*
 +                       * Though we deactivated the counter some cpus
 +                       * might still deliver spurious interrupts still
 +                       * in flight. Catch them:
 +                       */
 +                      if (__test_and_clear_bit(idx, cpuc->running))
 +                              handled++;
 +                      continue;
 +              }
 +
 +              event = cpuc->events[idx];
 +
 +              val = x86_perf_event_update(event);
 +              if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
 +                      continue;
 +
 +              /*
 +               * event overflow
 +               */
 +              handled++;
 +              perf_sample_data_init(&data, 0, event->hw.last_period);
 +
 +              if (!x86_perf_event_set_period(event))
 +                      continue;
 +
 +              if (perf_event_overflow(event, &data, regs))
 +                      x86_pmu_stop(event, 0);
 +      }
 +
 +      if (handled)
 +              inc_irq_stat(apic_perf_irqs);
 +
 +      return handled;
 +}
 +
 +void perf_events_lapic_init(void)
 +{
 +      if (!x86_pmu.apic || !x86_pmu_initialized())
 +              return;
 +
 +      /*
 +       * Always use NMI for PMU
 +       */
 +      apic_write(APIC_LVTPC, APIC_DM_NMI);
 +}
 +
 +static int
 +perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 +{
 +      u64 start_clock;
 +      u64 finish_clock;
 +      int ret;
 +
 +      /*
 +       * All PMUs/events that share this PMI handler should make sure to
 +       * increment active_events for their events.
 +       */
 +      if (!atomic_read(&active_events))
 +              return NMI_DONE;
 +
 +      start_clock = sched_clock();
 +      ret = x86_pmu.handle_irq(regs);
 +      finish_clock = sched_clock();
 +
 +      perf_sample_event_took(finish_clock - start_clock);
 +
 +      return ret;
 +}
 +NOKPROBE_SYMBOL(perf_event_nmi_handler);
 +
 +struct event_constraint emptyconstraint;
 +struct event_constraint unconstrained;
 +
 +static int
 +x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 +{
 +      unsigned int cpu = (long)hcpu;
 +      struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 +      int i, ret = NOTIFY_OK;
 +
 +      switch (action & ~CPU_TASKS_FROZEN) {
 +      case CPU_UP_PREPARE:
 +              for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
 +                      cpuc->kfree_on_online[i] = NULL;
 +              if (x86_pmu.cpu_prepare)
 +                      ret = x86_pmu.cpu_prepare(cpu);
 +              break;
 +
 +      case CPU_STARTING:
 +              if (x86_pmu.cpu_starting)
 +                      x86_pmu.cpu_starting(cpu);
 +              break;
 +
 +      case CPU_ONLINE:
 +              for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
 +                      kfree(cpuc->kfree_on_online[i]);
 +                      cpuc->kfree_on_online[i] = NULL;
 +              }
 +              break;
 +
 +      case CPU_DYING:
 +              if (x86_pmu.cpu_dying)
 +                      x86_pmu.cpu_dying(cpu);
 +              break;
 +
 +      case CPU_UP_CANCELED:
 +      case CPU_DEAD:
 +              if (x86_pmu.cpu_dead)
 +                      x86_pmu.cpu_dead(cpu);
 +              break;
 +
 +      default:
 +              break;
 +      }
 +
 +      return ret;
 +}
 +
 +static void __init pmu_check_apic(void)
 +{
 +      if (cpu_has_apic)
 +              return;
 +
 +      x86_pmu.apic = 0;
 +      pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
 +      pr_info("no hardware sampling interrupt available.\n");
 +
 +      /*
 +       * If we have a PMU initialized but no APIC
 +       * interrupts, we cannot sample hardware
 +       * events (user-space has to fall back and
 +       * sample via a hrtimer based software event):
 +       */
 +      pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 +
 +}
 +
 +static struct attribute_group x86_pmu_format_group = {
 +      .name = "format",
 +      .attrs = NULL,
 +};
 +
 +/*
 + * Remove all undefined events (x86_pmu.event_map(id) == 0)
 + * out of events_attr attributes.
 + */
 +static void __init filter_events(struct attribute **attrs)
 +{
 +      struct device_attribute *d;
 +      struct perf_pmu_events_attr *pmu_attr;
 +      int offset = 0;
 +      int i, j;
 +
 +      for (i = 0; attrs[i]; i++) {
 +              d = (struct device_attribute *)attrs[i];
 +              pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
 +              /* str trumps id */
 +              if (pmu_attr->event_str)
 +                      continue;
 +              if (x86_pmu.event_map(i + offset))
 +                      continue;
 +
 +              for (j = i; attrs[j]; j++)
 +                      attrs[j] = attrs[j + 1];
 +
 +              /* Check the shifted attr. */
 +              i--;
 +
 +              /*
 +               * event_map() is index based, the attrs array is organized
 +               * by increasing event index. If we shift the events, then
 +               * we need to compensate for the event_map(), otherwise
 +               * we are looking up the wrong event in the map
 +               */
 +              offset++;
 +      }
 +}
 +
 +/* Merge two pointer arrays */
 +__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
 +{
 +      struct attribute **new;
 +      int j, i;
 +
 +      for (j = 0; a[j]; j++)
 +              ;
 +      for (i = 0; b[i]; i++)
 +              j++;
 +      j++;
 +
 +      new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
 +      if (!new)
 +              return NULL;
 +
 +      j = 0;
 +      for (i = 0; a[i]; i++)
 +              new[j++] = a[i];
 +      for (i = 0; b[i]; i++)
 +              new[j++] = b[i];
 +      new[j] = NULL;
 +
 +      return new;
 +}
 +
 +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
 +                        char *page)
 +{
 +      struct perf_pmu_events_attr *pmu_attr = \
 +              container_of(attr, struct perf_pmu_events_attr, attr);
 +      u64 config = x86_pmu.event_map(pmu_attr->id);
 +
 +      /* string trumps id */
 +      if (pmu_attr->event_str)
 +              return sprintf(page, "%s", pmu_attr->event_str);
 +
 +      return x86_pmu.events_sysfs_show(page, config);
 +}
 +
 +EVENT_ATTR(cpu-cycles,                        CPU_CYCLES              );
 +EVENT_ATTR(instructions,              INSTRUCTIONS            );
 +EVENT_ATTR(cache-references,          CACHE_REFERENCES        );
 +EVENT_ATTR(cache-misses,              CACHE_MISSES            );
 +EVENT_ATTR(branch-instructions,               BRANCH_INSTRUCTIONS     );
 +EVENT_ATTR(branch-misses,             BRANCH_MISSES           );
 +EVENT_ATTR(bus-cycles,                        BUS_CYCLES              );
 +EVENT_ATTR(stalled-cycles-frontend,   STALLED_CYCLES_FRONTEND );
 +EVENT_ATTR(stalled-cycles-backend,    STALLED_CYCLES_BACKEND  );
 +EVENT_ATTR(ref-cycles,                        REF_CPU_CYCLES          );
 +
 +static struct attribute *empty_attrs;
 +
 +static struct attribute *events_attr[] = {
 +      EVENT_PTR(CPU_CYCLES),
 +      EVENT_PTR(INSTRUCTIONS),
 +      EVENT_PTR(CACHE_REFERENCES),
 +      EVENT_PTR(CACHE_MISSES),
 +      EVENT_PTR(BRANCH_INSTRUCTIONS),
 +      EVENT_PTR(BRANCH_MISSES),
 +      EVENT_PTR(BUS_CYCLES),
 +      EVENT_PTR(STALLED_CYCLES_FRONTEND),
 +      EVENT_PTR(STALLED_CYCLES_BACKEND),
 +      EVENT_PTR(REF_CPU_CYCLES),
 +      NULL,
 +};
 +
 +static struct attribute_group x86_pmu_events_group = {
 +      .name = "events",
 +      .attrs = events_attr,
 +};
 +
 +ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
 +{
 +      u64 umask  = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
 +      u64 cmask  = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
 +      bool edge  = (config & ARCH_PERFMON_EVENTSEL_EDGE);
 +      bool pc    = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
 +      bool any   = (config & ARCH_PERFMON_EVENTSEL_ANY);
 +      bool inv   = (config & ARCH_PERFMON_EVENTSEL_INV);
 +      ssize_t ret;
 +
 +      /*
 +      * We have whole page size to spend and just little data
 +      * to write, so we can safely use sprintf.
 +      */
 +      ret = sprintf(page, "event=0x%02llx", event);
 +
 +      if (umask)
 +              ret += sprintf(page + ret, ",umask=0x%02llx", umask);
 +
 +      if (edge)
 +              ret += sprintf(page + ret, ",edge");
 +
 +      if (pc)
 +              ret += sprintf(page + ret, ",pc");
 +
 +      if (any)
 +              ret += sprintf(page + ret, ",any");
 +
 +      if (inv)
 +              ret += sprintf(page + ret, ",inv");
 +
 +      if (cmask)
 +              ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
 +
 +      ret += sprintf(page + ret, "\n");
 +
 +      return ret;
 +}
 +
 +static int __init init_hw_perf_events(void)
 +{
 +      struct x86_pmu_quirk *quirk;
 +      int err;
 +
 +      pr_info("Performance Events: ");
 +
 +      switch (boot_cpu_data.x86_vendor) {
 +      case X86_VENDOR_INTEL:
 +              err = intel_pmu_init();
 +              break;
 +      case X86_VENDOR_AMD:
 +              err = amd_pmu_init();
 +              break;
 +      default:
 +              err = -ENOTSUPP;
 +      }
 +      if (err != 0) {
 +              pr_cont("no PMU driver, software events only.\n");
 +              return 0;
 +      }
 +
 +      pmu_check_apic();
 +
 +      /* sanity check that the hardware exists or is emulated */
 +      if (!check_hw_exists())
 +              return 0;
 +
 +      pr_cont("%s PMU driver.\n", x86_pmu.name);
 +
 +      x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
 +
 +      for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
 +              quirk->func();
 +
 +      if (!x86_pmu.intel_ctrl)
 +              x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
 +
 +      perf_events_lapic_init();
 +      register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
 +
 +      unconstrained = (struct event_constraint)
 +              __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
 +                                 0, x86_pmu.num_counters, 0, 0);
 +
 +      x86_pmu_format_group.attrs = x86_pmu.format_attrs;
 +
 +      if (x86_pmu.event_attrs)
 +              x86_pmu_events_group.attrs = x86_pmu.event_attrs;
 +
 +      if (!x86_pmu.events_sysfs_show)
 +              x86_pmu_events_group.attrs = &empty_attrs;
 +      else
 +              filter_events(x86_pmu_events_group.attrs);
 +
 +      if (x86_pmu.cpu_events) {
 +              struct attribute **tmp;
 +
 +              tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
 +              if (!WARN_ON(!tmp))
 +                      x86_pmu_events_group.attrs = tmp;
 +      }
 +
 +      pr_info("... version:                %d\n",     x86_pmu.version);
 +      pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
 +      pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
 +      pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
 +      pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
 +      pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 +      pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 +
 +      perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 +      perf_cpu_notifier(x86_pmu_notifier);
 +
 +      return 0;
 +}
 +early_initcall(init_hw_perf_events);
 +
 +static inline void x86_pmu_read(struct perf_event *event)
 +{
 +      x86_perf_event_update(event);
 +}
 +
 +/*
 + * Start group events scheduling transaction
 + * Set the flag to make pmu::enable() not perform the
 + * schedulability test, it will be performed at commit time
 + *
 + * We only support PERF_PMU_TXN_ADD transactions. Save the
 + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
 + * transactions.
 + */
 +static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +
 +      WARN_ON_ONCE(cpuc->txn_flags);          /* txn already in flight */
 +
 +      cpuc->txn_flags = txn_flags;
 +      if (txn_flags & ~PERF_PMU_TXN_ADD)
 +              return;
 +
 +      perf_pmu_disable(pmu);
 +      __this_cpu_write(cpu_hw_events.n_txn, 0);
 +}
 +
 +/*
 + * Stop group events scheduling transaction
 + * Clear the flag and pmu::enable() will perform the
 + * schedulability test.
 + */
 +static void x86_pmu_cancel_txn(struct pmu *pmu)
 +{
 +      unsigned int txn_flags;
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +
 +      WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
 +
 +      txn_flags = cpuc->txn_flags;
 +      cpuc->txn_flags = 0;
 +      if (txn_flags & ~PERF_PMU_TXN_ADD)
 +              return;
 +
 +      /*
 +       * Truncate collected array by the number of events added in this
 +       * transaction. See x86_pmu_add() and x86_pmu_*_txn().
 +       */
 +      __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
 +      __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
 +      perf_pmu_enable(pmu);
 +}
 +
 +/*
 + * Commit group events scheduling transaction
 + * Perform the group schedulability test as a whole
 + * Return 0 if success
 + *
 + * Does not cancel the transaction on failure; expects the caller to do this.
 + */
 +static int x86_pmu_commit_txn(struct pmu *pmu)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      int assign[X86_PMC_IDX_MAX];
 +      int n, ret;
 +
 +      WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
 +
 +      if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
 +              cpuc->txn_flags = 0;
 +              return 0;
 +      }
 +
 +      n = cpuc->n_events;
 +
 +      if (!x86_pmu_initialized())
 +              return -EAGAIN;
 +
 +      ret = x86_pmu.schedule_events(cpuc, n, assign);
 +      if (ret)
 +              return ret;
 +
 +      /*
 +       * copy new assignment, now we know it is possible
 +       * will be used by hw_perf_enable()
 +       */
 +      memcpy(cpuc->assign, assign, n*sizeof(int));
 +
 +      cpuc->txn_flags = 0;
 +      perf_pmu_enable(pmu);
 +      return 0;
 +}
 +/*
 + * a fake_cpuc is used to validate event groups. Due to
 + * the extra reg logic, we need to also allocate a fake
 + * per_core and per_cpu structure. Otherwise, group events
 + * using extra reg may conflict without the kernel being
 + * able to catch this when the last event gets added to
 + * the group.
 + */
 +static void free_fake_cpuc(struct cpu_hw_events *cpuc)
 +{
 +      kfree(cpuc->shared_regs);
 +      kfree(cpuc);
 +}
 +
 +static struct cpu_hw_events *allocate_fake_cpuc(void)
 +{
 +      struct cpu_hw_events *cpuc;
 +      int cpu = raw_smp_processor_id();
 +
 +      cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
 +      if (!cpuc)
 +              return ERR_PTR(-ENOMEM);
 +
 +      /* only needed, if we have extra_regs */
 +      if (x86_pmu.extra_regs) {
 +              cpuc->shared_regs = allocate_shared_regs(cpu);
 +              if (!cpuc->shared_regs)
 +                      goto error;
 +      }
 +      cpuc->is_fake = 1;
 +      return cpuc;
 +error:
 +      free_fake_cpuc(cpuc);
 +      return ERR_PTR(-ENOMEM);
 +}
 +
 +/*
 + * validate that we can schedule this event
 + */
 +static int validate_event(struct perf_event *event)
 +{
 +      struct cpu_hw_events *fake_cpuc;
 +      struct event_constraint *c;
 +      int ret = 0;
 +
 +      fake_cpuc = allocate_fake_cpuc();
 +      if (IS_ERR(fake_cpuc))
 +              return PTR_ERR(fake_cpuc);
 +
 +      c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
 +
 +      if (!c || !c->weight)
 +              ret = -EINVAL;
 +
 +      if (x86_pmu.put_event_constraints)
 +              x86_pmu.put_event_constraints(fake_cpuc, event);
 +
 +      free_fake_cpuc(fake_cpuc);
 +
 +      return ret;
 +}
 +
 +/*
 + * validate a single event group
 + *
 + * validation include:
 + *    - check events are compatible which each other
 + *    - events do not compete for the same counter
 + *    - number of events <= number of counters
 + *
 + * validation ensures the group can be loaded onto the
 + * PMU if it was the only group available.
 + */
 +static int validate_group(struct perf_event *event)
 +{
 +      struct perf_event *leader = event->group_leader;
 +      struct cpu_hw_events *fake_cpuc;
 +      int ret = -EINVAL, n;
 +
 +      fake_cpuc = allocate_fake_cpuc();
 +      if (IS_ERR(fake_cpuc))
 +              return PTR_ERR(fake_cpuc);
 +      /*
 +       * the event is not yet connected with its
 +       * siblings therefore we must first collect
 +       * existing siblings, then add the new event
 +       * before we can simulate the scheduling
 +       */
 +      n = collect_events(fake_cpuc, leader, true);
 +      if (n < 0)
 +              goto out;
 +
 +      fake_cpuc->n_events = n;
 +      n = collect_events(fake_cpuc, event, false);
 +      if (n < 0)
 +              goto out;
 +
 +      fake_cpuc->n_events = n;
 +
 +      ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
 +
 +out:
 +      free_fake_cpuc(fake_cpuc);
 +      return ret;
 +}
 +
 +static int x86_pmu_event_init(struct perf_event *event)
 +{
 +      struct pmu *tmp;
 +      int err;
 +
 +      switch (event->attr.type) {
 +      case PERF_TYPE_RAW:
 +      case PERF_TYPE_HARDWARE:
 +      case PERF_TYPE_HW_CACHE:
 +              break;
 +
 +      default:
 +              return -ENOENT;
 +      }
 +
 +      err = __x86_pmu_event_init(event);
 +      if (!err) {
 +              /*
 +               * we temporarily connect event to its pmu
 +               * such that validate_group() can classify
 +               * it as an x86 event using is_x86_event()
 +               */
 +              tmp = event->pmu;
 +              event->pmu = &pmu;
 +
 +              if (event->group_leader != event)
 +                      err = validate_group(event);
 +              else
 +                      err = validate_event(event);
 +
 +              event->pmu = tmp;
 +      }
 +      if (err) {
 +              if (event->destroy)
 +                      event->destroy(event);
 +      }
 +
 +      if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
 +              event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
 +
 +      return err;
 +}
 +
 +static void refresh_pce(void *ignored)
 +{
 +      if (current->mm)
 +              load_mm_cr4(current->mm);
 +}
 +
 +static void x86_pmu_event_mapped(struct perf_event *event)
 +{
 +      if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
 +              return;
 +
 +      if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
 +              on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
 +}
 +
 +static void x86_pmu_event_unmapped(struct perf_event *event)
 +{
 +      if (!current->mm)
 +              return;
 +
 +      if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
 +              return;
 +
 +      if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
 +              on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
 +}
 +
 +static int x86_pmu_event_idx(struct perf_event *event)
 +{
 +      int idx = event->hw.idx;
 +
 +      if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
 +              return 0;
 +
 +      if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
 +              idx -= INTEL_PMC_IDX_FIXED;
 +              idx |= 1 << 30;
 +      }
 +
 +      return idx + 1;
 +}
 +
 +static ssize_t get_attr_rdpmc(struct device *cdev,
 +                            struct device_attribute *attr,
 +                            char *buf)
 +{
 +      return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
 +}
 +
 +static ssize_t set_attr_rdpmc(struct device *cdev,
 +                            struct device_attribute *attr,
 +                            const char *buf, size_t count)
 +{
 +      unsigned long val;
 +      ssize_t ret;
 +
 +      ret = kstrtoul(buf, 0, &val);
 +      if (ret)
 +              return ret;
 +
 +      if (val > 2)
 +              return -EINVAL;
 +
 +      if (x86_pmu.attr_rdpmc_broken)
 +              return -ENOTSUPP;
 +
 +      if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
 +              /*
 +               * Changing into or out of always available, aka
 +               * perf-event-bypassing mode.  This path is extremely slow,
 +               * but only root can trigger it, so it's okay.
 +               */
 +              if (val == 2)
 +                      static_key_slow_inc(&rdpmc_always_available);
 +              else
 +                      static_key_slow_dec(&rdpmc_always_available);
 +              on_each_cpu(refresh_pce, NULL, 1);
 +      }
 +
 +      x86_pmu.attr_rdpmc = val;
 +
 +      return count;
 +}
 +
 +static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
 +
 +static struct attribute *x86_pmu_attrs[] = {
 +      &dev_attr_rdpmc.attr,
 +      NULL,
 +};
 +
 +static struct attribute_group x86_pmu_attr_group = {
 +      .attrs = x86_pmu_attrs,
 +};
 +
 +static const struct attribute_group *x86_pmu_attr_groups[] = {
 +      &x86_pmu_attr_group,
 +      &x86_pmu_format_group,
 +      &x86_pmu_events_group,
 +      NULL,
 +};
 +
 +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 +{
 +      if (x86_pmu.sched_task)
 +              x86_pmu.sched_task(ctx, sched_in);
 +}
 +
 +void perf_check_microcode(void)
 +{
 +      if (x86_pmu.check_microcode)
 +              x86_pmu.check_microcode();
 +}
 +EXPORT_SYMBOL_GPL(perf_check_microcode);
 +
 +static struct pmu pmu = {
 +      .pmu_enable             = x86_pmu_enable,
 +      .pmu_disable            = x86_pmu_disable,
 +
 +      .attr_groups            = x86_pmu_attr_groups,
 +
 +      .event_init             = x86_pmu_event_init,
 +
 +      .event_mapped           = x86_pmu_event_mapped,
 +      .event_unmapped         = x86_pmu_event_unmapped,
 +
 +      .add                    = x86_pmu_add,
 +      .del                    = x86_pmu_del,
 +      .start                  = x86_pmu_start,
 +      .stop                   = x86_pmu_stop,
 +      .read                   = x86_pmu_read,
 +
 +      .start_txn              = x86_pmu_start_txn,
 +      .cancel_txn             = x86_pmu_cancel_txn,
 +      .commit_txn             = x86_pmu_commit_txn,
 +
 +      .event_idx              = x86_pmu_event_idx,
 +      .sched_task             = x86_pmu_sched_task,
 +      .task_ctx_size          = sizeof(struct x86_perf_task_context),
 +};
 +
 +void arch_perf_update_userpage(struct perf_event *event,
 +                             struct perf_event_mmap_page *userpg, u64 now)
 +{
 +      struct cyc2ns_data *data;
 +
 +      userpg->cap_user_time = 0;
 +      userpg->cap_user_time_zero = 0;
 +      userpg->cap_user_rdpmc =
 +              !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
 +      userpg->pmc_width = x86_pmu.cntval_bits;
 +
 +      if (!sched_clock_stable())
 +              return;
 +
 +      data = cyc2ns_read_begin();
 +
 +      /*
 +       * Internal timekeeping for enabled/running/stopped times
 +       * is always in the local_clock domain.
 +       */
 +      userpg->cap_user_time = 1;
 +      userpg->time_mult = data->cyc2ns_mul;
 +      userpg->time_shift = data->cyc2ns_shift;
 +      userpg->time_offset = data->cyc2ns_offset - now;
 +
 +      /*
 +       * cap_user_time_zero doesn't make sense when we're using a different
 +       * time base for the records.
 +       */
 +      if (event->clock == &local_clock) {
 +              userpg->cap_user_time_zero = 1;
 +              userpg->time_zero = data->cyc2ns_offset;
 +      }
 +
 +      cyc2ns_read_end(data);
 +}
 +
 +/*
 + * callchain support
 + */
 +
 +static int backtrace_stack(void *data, char *name)
 +{
 +      return 0;
 +}
 +
-       perf_callchain_store(entry, addr);
++static int backtrace_address(void *data, unsigned long addr, int reliable)
 +{
 +      struct perf_callchain_entry *entry = data;
 +
++      return perf_callchain_store(entry, addr);
 +}
 +
 +static const struct stacktrace_ops backtrace_ops = {
 +      .stack                  = backtrace_stack,
 +      .address                = backtrace_address,
 +      .walk_stack             = print_context_stack_bp,
 +};
 +
 +void
 +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 +{
 +      if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 +              /* TODO: We don't support guest os callchain now */
 +              return;
 +      }
 +
 +      perf_callchain_store(entry, regs->ip);
 +
 +      dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
 +}
 +
 +static inline int
 +valid_user_frame(const void __user *fp, unsigned long size)
 +{
 +      return (__range_not_ok(fp, size, TASK_SIZE) == 0);
 +}
 +
 +static unsigned long get_segment_base(unsigned int segment)
 +{
 +      struct desc_struct *desc;
 +      int idx = segment >> 3;
 +
 +      if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
 +#ifdef CONFIG_MODIFY_LDT_SYSCALL
 +              struct ldt_struct *ldt;
 +
 +              if (idx > LDT_ENTRIES)
 +                      return 0;
 +
 +              /* IRQs are off, so this synchronizes with smp_store_release */
 +              ldt = lockless_dereference(current->active_mm->context.ldt);
 +              if (!ldt || idx > ldt->size)
 +                      return 0;
 +
 +              desc = &ldt->entries[idx];
 +#else
 +              return 0;
 +#endif
 +      } else {
 +              if (idx > GDT_ENTRIES)
 +                      return 0;
 +
 +              desc = raw_cpu_ptr(gdt_page.gdt) + idx;
 +      }
 +
 +      return get_desc_base(desc);
 +}
 +
 +#ifdef CONFIG_IA32_EMULATION
 +
 +#include <asm/compat.h>
 +
 +static inline int
 +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 +{
 +      /* 32-bit process in 64-bit kernel. */
 +      unsigned long ss_base, cs_base;
 +      struct stack_frame_ia32 frame;
 +      const void __user *fp;
 +
 +      if (!test_thread_flag(TIF_IA32))
 +              return 0;
 +
 +      cs_base = get_segment_base(regs->cs);
 +      ss_base = get_segment_base(regs->ss);
 +
 +      fp = compat_ptr(ss_base + regs->bp);
 +      pagefault_disable();
 +      while (entry->nr < PERF_MAX_STACK_DEPTH) {
 +              unsigned long bytes;
 +              frame.next_frame     = 0;
 +              frame.return_address = 0;
 +
 +              if (!access_ok(VERIFY_READ, fp, 8))
 +                      break;
 +
 +              bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
 +              if (bytes != 0)
 +                      break;
 +              bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
 +              if (bytes != 0)
 +                      break;
 +
 +              if (!valid_user_frame(fp, sizeof(frame)))
 +                      break;
 +
 +              perf_callchain_store(entry, cs_base + frame.return_address);
 +              fp = compat_ptr(ss_base + frame.next_frame);
 +      }
 +      pagefault_enable();
 +      return 1;
 +}
 +#else
 +static inline int
 +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 +{
 +    return 0;
 +}
 +#endif
 +
 +void
 +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 +{
 +      struct stack_frame frame;
 +      const void __user *fp;
 +
 +      if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 +              /* TODO: We don't support guest os callchain now */
 +              return;
 +      }
 +
 +      /*
 +       * We don't know what to do with VM86 stacks.. ignore them for now.
 +       */
 +      if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
 +              return;
 +
 +      fp = (void __user *)regs->bp;
 +
 +      perf_callchain_store(entry, regs->ip);
 +
 +      if (!current->mm)
 +              return;
 +
 +      if (perf_callchain_user32(regs, entry))
 +              return;
 +
 +      pagefault_disable();
 +      while (entry->nr < PERF_MAX_STACK_DEPTH) {
 +              unsigned long bytes;
 +              frame.next_frame             = NULL;
 +              frame.return_address = 0;
 +
 +              if (!access_ok(VERIFY_READ, fp, 16))
 +                      break;
 +
 +              bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
 +              if (bytes != 0)
 +                      break;
 +              bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
 +              if (bytes != 0)
 +                      break;
 +
 +              if (!valid_user_frame(fp, sizeof(frame)))
 +                      break;
 +
 +              perf_callchain_store(entry, frame.return_address);
 +              fp = (void __user *)frame.next_frame;
 +      }
 +      pagefault_enable();
 +}
 +
 +/*
 + * Deal with code segment offsets for the various execution modes:
 + *
 + *   VM86 - the good olde 16 bit days, where the linear address is
 + *          20 bits and we use regs->ip + 0x10 * regs->cs.
 + *
 + *   IA32 - Where we need to look at GDT/LDT segment descriptor tables
 + *          to figure out what the 32bit base address is.
 + *
 + *    X32 - has TIF_X32 set, but is running in x86_64
 + *
 + * X86_64 - CS,DS,SS,ES are all zero based.
 + */
 +static unsigned long code_segment_base(struct pt_regs *regs)
 +{
 +      /*
 +       * For IA32 we look at the GDT/LDT segment base to convert the
 +       * effective IP to a linear address.
 +       */
 +
 +#ifdef CONFIG_X86_32
 +      /*
 +       * If we are in VM86 mode, add the segment offset to convert to a
 +       * linear address.
 +       */
 +      if (regs->flags & X86_VM_MASK)
 +              return 0x10 * regs->cs;
 +
 +      if (user_mode(regs) && regs->cs != __USER_CS)
 +              return get_segment_base(regs->cs);
 +#else
 +      if (user_mode(regs) && !user_64bit_mode(regs) &&
 +          regs->cs != __USER32_CS)
 +              return get_segment_base(regs->cs);
 +#endif
 +      return 0;
 +}
 +
 +unsigned long perf_instruction_pointer(struct pt_regs *regs)
 +{
 +      if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
 +              return perf_guest_cbs->get_guest_ip();
 +
 +      return regs->ip + code_segment_base(regs);
 +}
 +
 +unsigned long perf_misc_flags(struct pt_regs *regs)
 +{
 +      int misc = 0;
 +
 +      if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 +              if (perf_guest_cbs->is_user_mode())
 +                      misc |= PERF_RECORD_MISC_GUEST_USER;
 +              else
 +                      misc |= PERF_RECORD_MISC_GUEST_KERNEL;
 +      } else {
 +              if (user_mode(regs))
 +                      misc |= PERF_RECORD_MISC_USER;
 +              else
 +                      misc |= PERF_RECORD_MISC_KERNEL;
 +      }
 +
 +      if (regs->flags & PERF_EFLAGS_EXACT)
 +              misc |= PERF_RECORD_MISC_EXACT_IP;
 +
 +      return misc;
 +}
 +
 +void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
 +{
 +      cap->version            = x86_pmu.version;
 +      cap->num_counters_gp    = x86_pmu.num_counters;
 +      cap->num_counters_fixed = x86_pmu.num_counters_fixed;
 +      cap->bit_width_gp       = x86_pmu.cntval_bits;
 +      cap->bit_width_fixed    = x86_pmu.cntval_bits;
 +      cap->events_mask        = (unsigned int)x86_pmu.events_maskl;
 +      cap->events_mask_len    = x86_pmu.events_mask_len;
 +}
 +EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
Simple merge
Simple merge
Simple merge
index bf3446794bd5f5d3eb05229d3e57fdb10ed3f046,e848d708d2b733caf9edc9a0f0f0f1cdd60ad688..5121be4675d14de3ea8f7722434945caeb307eb9
  #define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
                           LEFTOVERS_MAX_FT)
  
- #define KERNEL_MAX_FT 2
- #define KERNEL_NUM_PRIOS 1
+ #define KERNEL_MAX_FT 3
+ #define KERNEL_NUM_PRIOS 2
  #define KENREL_MIN_LEVEL 2
  
 +#define ANCHOR_MAX_FT 1
 +#define ANCHOR_NUM_PRIOS 1
 +#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
  struct node_caps {
        size_t  arr_sz;
        long    *caps;
@@@ -367,13 -360,8 +367,13 @@@ static void del_rule(struct fs_node *no
        memcpy(match_value, fte->val, sizeof(fte->val));
        fs_get_obj(ft, fg->node.parent);
        list_del(&rule->node.list);
-       fte->dests_size--;
-       if (fte->dests_size) {
 +      if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 +              mutex_lock(&rule->dest_attr.ft->lock);
 +              list_del(&rule->next_ft);
 +              mutex_unlock(&rule->dest_attr.ft->lock);
 +      }
+       if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+           --fte->dests_size) {
                err = mlx5_cmd_update_fte(dev, ft,
                                          fg->id, fte);
                if (err)
@@@ -835,9 -762,9 +835,10 @@@ static struct mlx5_flow_rule *alloc_rul
        if (!rule)
                return NULL;
  
 +      INIT_LIST_HEAD(&rule->next_ft);
        rule->node.type = FS_TYPE_FLOW_DEST;
-       memcpy(&rule->dest_attr, dest, sizeof(*dest));
+       if (dest)
+               memcpy(&rule->dest_attr, dest, sizeof(*dest));
  
        return rule;
  }
@@@ -856,16 -783,12 +857,17 @@@ static struct mlx5_flow_rule *add_rule_
                return ERR_PTR(-ENOMEM);
  
        fs_get_obj(ft, fg->node.parent);
 -      /* Add dest to dests list- added as first element after the head */
 +      /* Add dest to dests list- we need flow tables to be in the
 +       * end of the list for forward to next prio rules.
 +       */
        tree_init_node(&rule->node, 1, del_rule);
 -      list_add_tail(&rule->node.list, &fte->node.children);
 +      if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
 +              list_add(&rule->node.list, &fte->node.children);
 +      else
 +              list_add_tail(&rule->node.list, &fte->node.children);
-       fte->dests_size++;
-       if (fte->dests_size == 1)
+       if (dest)
+               fte->dests_size++;
+       if (fte->dests_size == 1 || !dest)
                err = mlx5_cmd_create_fte(get_dev(&ft->node),
                                          ft, fg->id, fte);
        else
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc net/core/sock.c
Simple merge
Simple merge
diff --cc net/ipv4/tcp.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 0d96b48a64925840cfc2249e928ebdde51ac00a8,3106a0c4960be7db6e726ab70bf1ed6b891b9c2f..f0aeb8163688e6f4167874d9a6ac0f3bd270891d
@@@ -128,23 -128,21 +128,23 @@@ static void rxkad_prime_packet_security
        token = conn->key->payload.data[0];
        memcpy(&iv, token->kad->session_key, sizeof(iv));
  
-       tmpbuf.x[0] = conn->epoch;
-       tmpbuf.x[1] = conn->cid;
 -      desc.tfm = conn->cipher;
 -      desc.info = iv.x;
 -      desc.flags = 0;
 -
+       tmpbuf.x[0] = htonl(conn->epoch);
+       tmpbuf.x[1] = htonl(conn->cid);
        tmpbuf.x[2] = 0;
        tmpbuf.x[3] = htonl(conn->security_ix);
  
        sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
        sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
 -      crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
 +
 +      skcipher_request_set_tfm(req, conn->cipher);
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
 +
 +      crypto_skcipher_encrypt(req);
 +      skcipher_request_zero(req);
  
        memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
-       ASSERTCMP(conn->csum_iv.n[0], ==, tmpbuf.x[2]);
+       ASSERTCMP((u32 __force)conn->csum_iv.n[0], ==, (u32 __force)tmpbuf.x[2]);
  
        _leave("");
  }
@@@ -267,12 -251,12 +267,12 @@@ out
   * checksum an RxRPC packet header
   */
  static int rxkad_secure_packet(const struct rxrpc_call *call,
-                               struct sk_buff *skb,
-                               size_t data_size,
-                               void *sechdr)
+                              struct sk_buff *skb,
+                              size_t data_size,
+                              void *sechdr)
  {
        struct rxrpc_skb_priv *sp;
 -      struct blkcipher_desc desc;
 +      SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist sg[2];
        struct {
  
        /* continue encrypting from where we left off */
        memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 -      desc.tfm = call->conn->cipher;
 -      desc.info = iv.x;
 -      desc.flags = 0;
  
        /* calculate the security checksum */
-       x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
-       x |= sp->hdr.seq & cpu_to_be32(0x3fffffff);
-       tmpbuf.x[0] = sp->hdr.callNumber;
-       tmpbuf.x[1] = x;
+       x = call->channel << (32 - RXRPC_CIDSHIFT);
+       x |= sp->hdr.seq & 0x3fffffff;
+       tmpbuf.x[0] = htonl(sp->hdr.callNumber);
+       tmpbuf.x[1] = htonl(x);
  
        sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
        sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
@@@ -539,29 -513,25 +536,28 @@@ static int rxkad_verify_packet(const st
  
        /* continue encrypting from where we left off */
        memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 -      desc.tfm = call->conn->cipher;
 -      desc.info = iv.x;
 -      desc.flags = 0;
  
        /* validate the security checksum */
-       x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
-       x |= sp->hdr.seq & cpu_to_be32(0x3fffffff);
-       tmpbuf.x[0] = call->call_id;
-       tmpbuf.x[1] = x;
+       x = call->channel << (32 - RXRPC_CIDSHIFT);
+       x |= sp->hdr.seq & 0x3fffffff;
+       tmpbuf.x[0] = htonl(call->call_id);
+       tmpbuf.x[1] = htonl(x);
  
        sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
        sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
 -      crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
 +
 +      skcipher_request_set_tfm(req, call->conn->cipher);
 +      skcipher_request_set_callback(req, 0, NULL, NULL);
 +      skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
 +
 +      crypto_skcipher_encrypt(req);
 +      skcipher_request_zero(req);
  
        y = ntohl(tmpbuf.x[1]);
-       y = (y >> 16) & 0xffff;
-       if (y == 0)
-               y = 1; /* zero checksums are not permitted */
+       cksum = (y >> 16) & 0xffff;
+       if (cksum == 0)
+               cksum = 1; /* zero checksums are not permitted */
  
-       cksum = htons(y);
        if (sp->hdr.cksum != cksum) {
                *_abort_code = RXKADSEALEDINCON;
                _leave(" = -EPROTO [csum failed]");
Simple merge
Simple merge