kernel: sfp: re-attempt probing for phy sfp
authorJonas Gorski <jonas.gorski@gmail.com>
Wed, 4 Dec 2019 22:11:34 +0000 (23:11 +0100)
committerJonas Gorski <jonas.gorski@gmail.com>
Wed, 4 Dec 2019 22:30:35 +0000 (23:30 +0100)
Add patches retrying to probe the PHY to restore support for PHYs taking
longer to initialize without breaking modules without PHYs.

Patches taken from http://git.armlinux.org.uk/cgit/linux-arm.git/log/?h=phy

Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
target/linux/generic/pending-4.19/756-net-sfp-use-a-definition-for-the-fault-recovery-atte.patch [new file with mode: 0644]
target/linux/generic/pending-4.19/757-net-sfp-rename-sm_retries.patch [new file with mode: 0644]
target/linux/generic/pending-4.19/758-net-sfp-error-handling-for-phy-probe.patch [new file with mode: 0644]
target/linux/generic/pending-4.19/759-net-sfp-re-attempt-probing-for-phy.patch [new file with mode: 0644]

diff --git a/target/linux/generic/pending-4.19/756-net-sfp-use-a-definition-for-the-fault-recovery-atte.patch b/target/linux/generic/pending-4.19/756-net-sfp-use-a-definition-for-the-fault-recovery-atte.patch
new file mode 100644 (file)
index 0000000..0e5005e
--- /dev/null
@@ -0,0 +1,55 @@
+From 4d6bfb6fbb00af38402db4d1ce464e22def9fd9e Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Thu, 28 Nov 2019 14:24:40 +0000
+Subject: [PATCH 1/4] net: sfp: use a definition for the fault recovery
+ attempts
+
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+---
+ drivers/net/phy/sfp.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/phy/sfp.c
++++ b/drivers/net/phy/sfp.c
+@@ -160,6 +160,14 @@ static const enum gpiod_flags gpio_flags
+ #define T_RESET_US    10
+ #define T_FAULT_RECOVER       msecs_to_jiffies(1000)
++/* N_FAULT_INIT is the number of recovery attempts at module initialisation
++ * time. If the TX_FAULT signal is not deasserted after this number of
++ * attempts at clearing it, we decide that the module is faulty.
++ * N_FAULT is the same but after the module has initialised.
++ */
++#define N_FAULT_INIT          5
++#define N_FAULT                       5
++
+ /* SFP module presence detection is poor: the three MOD DEF signals are
+  * the same length on the PCB, which means it's possible for MOD DEF 0 to
+  * connect before the I2C bus on MOD DEF 1/2.
+@@ -1803,7 +1811,7 @@ static void sfp_sm_main(struct sfp *sfp,
+               sfp_module_tx_enable(sfp);
+               /* Initialise the fault clearance retries */
+-              sfp->sm_retries = 5;
++              sfp->sm_retries = N_FAULT_INIT;
+               /* We need to check the TX_FAULT state, which is not defined
+                * while TX_DISABLE is asserted. The earliest we want to do
+@@ -1842,7 +1850,7 @@ static void sfp_sm_main(struct sfp *sfp,
+                        * there is a fault.
+                        */
+                       sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT,
+-                                   sfp->sm_retries == 5);
++                                   sfp->sm_retries == N_FAULT_INIT);
+               } else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
+       init_done:      /* TX_FAULT deasserted or we timed out with TX_FAULT
+                        * clear.  Probe for the PHY and check the LOS state.
+@@ -1855,7 +1863,7 @@ static void sfp_sm_main(struct sfp *sfp,
+                       sfp_sm_link_check_los(sfp);
+                       /* Reset the fault retry count */
+-                      sfp->sm_retries = 5;
++                      sfp->sm_retries = N_FAULT;
+               }
+               break;
diff --git a/target/linux/generic/pending-4.19/757-net-sfp-rename-sm_retries.patch b/target/linux/generic/pending-4.19/757-net-sfp-rename-sm_retries.patch
new file mode 100644 (file)
index 0000000..b347d2c
--- /dev/null
@@ -0,0 +1,60 @@
+From bfa3cbb01c7ea34d7369c9bd2ec1b2dc67082b04 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Mon, 2 Dec 2019 18:06:44 +0000
+Subject: [PATCH 2/4] net: sfp: rename sm_retries
+
+Rename sm_retries as sm_fault_retries, as this is what this member is
+tracking.
+
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+---
+ drivers/net/phy/sfp.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/phy/sfp.c
++++ b/drivers/net/phy/sfp.c
+@@ -222,7 +222,7 @@ struct sfp {
+       unsigned char sm_mod_tries;
+       unsigned char sm_dev_state;
+       unsigned short sm_state;
+-      unsigned int sm_retries;
++      unsigned char sm_fault_retries;
+       struct sfp_eeprom_id id;
+       unsigned int module_power_mW;
+@@ -1414,7 +1414,7 @@ static bool sfp_los_event_inactive(struc
+ static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn)
+ {
+-      if (sfp->sm_retries && !--sfp->sm_retries) {
++      if (sfp->sm_fault_retries && !--sfp->sm_fault_retries) {
+               dev_err(sfp->dev,
+                       "module persistently indicates fault, disabling\n");
+               sfp_sm_next(sfp, SFP_S_TX_DISABLE, 0);
+@@ -1811,7 +1811,7 @@ static void sfp_sm_main(struct sfp *sfp,
+               sfp_module_tx_enable(sfp);
+               /* Initialise the fault clearance retries */
+-              sfp->sm_retries = N_FAULT_INIT;
++              sfp->sm_fault_retries = N_FAULT_INIT;
+               /* We need to check the TX_FAULT state, which is not defined
+                * while TX_DISABLE is asserted. The earliest we want to do
+@@ -1850,7 +1850,7 @@ static void sfp_sm_main(struct sfp *sfp,
+                        * there is a fault.
+                        */
+                       sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT,
+-                                   sfp->sm_retries == N_FAULT_INIT);
++                                   sfp->sm_fault_retries == N_FAULT_INIT);
+               } else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
+       init_done:      /* TX_FAULT deasserted or we timed out with TX_FAULT
+                        * clear.  Probe for the PHY and check the LOS state.
+@@ -1863,7 +1863,7 @@ static void sfp_sm_main(struct sfp *sfp,
+                       sfp_sm_link_check_los(sfp);
+                       /* Reset the fault retry count */
+-                      sfp->sm_retries = N_FAULT;
++                      sfp->sm_fault_retries = N_FAULT;
+               }
+               break;
diff --git a/target/linux/generic/pending-4.19/758-net-sfp-error-handling-for-phy-probe.patch b/target/linux/generic/pending-4.19/758-net-sfp-error-handling-for-phy-probe.patch
new file mode 100644 (file)
index 0000000..00896df
--- /dev/null
@@ -0,0 +1,97 @@
+From 1fba543dc8edf4a43bff3276306648bb27c1e207 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Fri, 29 Nov 2019 00:30:08 +0000
+Subject: [PATCH 3/4] net: sfp: error handling for phy probe
+
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+---
+ drivers/net/phy/sfp.c | 26 +++++++++++++++++---------
+ 1 file changed, 17 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/phy/sfp.c
++++ b/drivers/net/phy/sfp.c
+@@ -1334,7 +1334,7 @@ static void sfp_sm_phy_detach(struct sfp
+       sfp->mod_phy = NULL;
+ }
+-static void sfp_sm_probe_phy(struct sfp *sfp, bool is_c45)
++static int sfp_sm_probe_phy(struct sfp *sfp, bool is_c45)
+ {
+       struct phy_device *phy;
+       int err;
+@@ -1342,18 +1342,18 @@ static void sfp_sm_probe_phy(struct sfp
+       phy = get_phy_device(sfp->i2c_mii, SFP_PHY_ADDR, is_c45);
+       if (phy == ERR_PTR(-ENODEV)) {
+               dev_info(sfp->dev, "no PHY detected\n");
+-              return;
++              return 0;
+       }
+       if (IS_ERR(phy)) {
+               dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy));
+-              return;
++              return PTR_ERR(phy);
+       }
+       err = phy_device_register(phy);
+       if (err) {
+               phy_device_free(phy);
+               dev_err(sfp->dev, "phy_device_register failed: %d\n", err);
+-              return;
++              return err;
+       }
+       err = sfp_add_phy(sfp->sfp_bus, phy);
+@@ -1361,10 +1361,12 @@ static void sfp_sm_probe_phy(struct sfp
+               phy_device_remove(phy);
+               phy_device_free(phy);
+               dev_err(sfp->dev, "sfp_add_phy failed: %d\n", err);
+-              return;
++              return err;
+       }
+       sfp->mod_phy = phy;
++
++      return 0;
+ }
+ static void sfp_sm_link_up(struct sfp *sfp)
+@@ -1437,21 +1439,24 @@ static void sfp_sm_fault(struct sfp *sfp
+  * Clause 45 copper SFP+ modules (10G) appear to switch their interface
+  * mode according to the negotiated line speed.
+  */
+-static void sfp_sm_probe_for_phy(struct sfp *sfp)
++static int sfp_sm_probe_for_phy(struct sfp *sfp)
+ {
++      int err = 0;
++
+       switch (sfp->id.base.extended_cc) {
+       case SFF8024_ECC_10GBASE_T_SFI:
+       case SFF8024_ECC_10GBASE_T_SR:
+       case SFF8024_ECC_5GBASE_T:
+       case SFF8024_ECC_2_5GBASE_T:
+-              sfp_sm_probe_phy(sfp, true);
++              err = sfp_sm_probe_phy(sfp, true);
+               break;
+       default:
+               if (sfp->id.base.e1000_base_t)
+-                      sfp_sm_probe_phy(sfp, false);
++                      err = sfp_sm_probe_phy(sfp, false);
+               break;
+       }
++      return err;
+ }
+ static int sfp_module_parse_power(struct sfp *sfp)
+@@ -1855,7 +1860,10 @@ static void sfp_sm_main(struct sfp *sfp,
+       init_done:      /* TX_FAULT deasserted or we timed out with TX_FAULT
+                        * clear.  Probe for the PHY and check the LOS state.
+                        */
+-                      sfp_sm_probe_for_phy(sfp);
++                      if (sfp_sm_probe_for_phy(sfp)) {
++                              sfp_sm_next(sfp, SFP_S_FAIL, 0);
++                              break;
++                      }
+                       if (sfp_module_start(sfp->sfp_bus)) {
+                               sfp_sm_next(sfp, SFP_S_FAIL, 0);
+                               break;
diff --git a/target/linux/generic/pending-4.19/759-net-sfp-re-attempt-probing-for-phy.patch b/target/linux/generic/pending-4.19/759-net-sfp-re-attempt-probing-for-phy.patch
new file mode 100644 (file)
index 0000000..526c796
--- /dev/null
@@ -0,0 +1,132 @@
+From 6c4efe83a0acf6f06c89ae17b885fa5739eb5be7 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Mon, 2 Dec 2019 18:20:22 +0000
+Subject: [PATCH 4/4] net: sfp: re-attempt probing for phy
+
+Some 1000BASE-T PHY modules take a while for the PHY to wake up.
+Retry the probe a number of times before deciding that the module has
+no PHY.
+
+Tested with:
+ Sourcephotonics SPGBTXCNFC - PHY takes less than 50ms to respond.
+ Champion One 1000SFPT - PHY takes about 200ms to respond.
+ Mikrotik S-RJ01 - no PHY
+
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+---
+ drivers/net/phy/sfp.c | 59 ++++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 42 insertions(+), 17 deletions(-)
+
+--- a/drivers/net/phy/sfp.c
++++ b/drivers/net/phy/sfp.c
+@@ -60,6 +60,7 @@ enum {
+       SFP_S_FAIL,
+       SFP_S_WAIT,
+       SFP_S_INIT,
++      SFP_S_INIT_PHY,
+       SFP_S_INIT_TX_FAULT,
+       SFP_S_WAIT_LOS,
+       SFP_S_LINK_UP,
+@@ -124,6 +125,7 @@ static const char * const sm_state_strin
+       [SFP_S_FAIL] = "fail",
+       [SFP_S_WAIT] = "wait",
+       [SFP_S_INIT] = "init",
++      [SFP_S_INIT_PHY] = "init_phy",
+       [SFP_S_INIT_TX_FAULT] = "init_tx_fault",
+       [SFP_S_WAIT_LOS] = "wait_los",
+       [SFP_S_LINK_UP] = "link_up",
+@@ -168,6 +170,12 @@ static const enum gpiod_flags gpio_flags
+ #define N_FAULT_INIT          5
+ #define N_FAULT                       5
++/* T_PHY_RETRY is the time interval between attempts to probe the PHY.
++ * R_PHY_RETRY is the number of attempts.
++ */
++#define T_PHY_RETRY           msecs_to_jiffies(50)
++#define R_PHY_RETRY           12
++
+ /* SFP module presence detection is poor: the three MOD DEF signals are
+  * the same length on the PCB, which means it's possible for MOD DEF 0 to
+  * connect before the I2C bus on MOD DEF 1/2.
+@@ -223,6 +231,7 @@ struct sfp {
+       unsigned char sm_dev_state;
+       unsigned short sm_state;
+       unsigned char sm_fault_retries;
++      unsigned char sm_phy_retries;
+       struct sfp_eeprom_id id;
+       unsigned int module_power_mW;
+@@ -1340,10 +1349,8 @@ static int sfp_sm_probe_phy(struct sfp *
+       int err;
+       phy = get_phy_device(sfp->i2c_mii, SFP_PHY_ADDR, is_c45);
+-      if (phy == ERR_PTR(-ENODEV)) {
+-              dev_info(sfp->dev, "no PHY detected\n");
+-              return 0;
+-      }
++      if (phy == ERR_PTR(-ENODEV))
++              return PTR_ERR(phy);
+       if (IS_ERR(phy)) {
+               dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy));
+               return PTR_ERR(phy);
+@@ -1785,6 +1792,7 @@ static void sfp_sm_module(struct sfp *sf
+ static void sfp_sm_main(struct sfp *sfp, unsigned int event)
+ {
+       unsigned long timeout;
++      int ret;
+       /* Some events are global */
+       if (sfp->sm_state != SFP_S_DOWN &&
+@@ -1857,22 +1865,39 @@ static void sfp_sm_main(struct sfp *sfp,
+                       sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT,
+                                    sfp->sm_fault_retries == N_FAULT_INIT);
+               } else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
+-      init_done:      /* TX_FAULT deasserted or we timed out with TX_FAULT
+-                       * clear.  Probe for the PHY and check the LOS state.
+-                       */
+-                      if (sfp_sm_probe_for_phy(sfp)) {
+-                              sfp_sm_next(sfp, SFP_S_FAIL, 0);
+-                              break;
+-                      }
+-                      if (sfp_module_start(sfp->sfp_bus)) {
+-                              sfp_sm_next(sfp, SFP_S_FAIL, 0);
++      init_done:
++                      sfp->sm_phy_retries = R_PHY_RETRY;
++                      goto phy_probe;
++              }
++              break;
++
++      case SFP_S_INIT_PHY:
++              if (event != SFP_E_TIMEOUT)
++                      break;
++      phy_probe:
++              /* TX_FAULT deasserted or we timed out with TX_FAULT
++               * clear.  Probe for the PHY and check the LOS state.
++               */
++              ret = sfp_sm_probe_for_phy(sfp);
++              if (ret == -ENODEV) {
++                      if (--sfp->sm_phy_retries) {
++                              sfp_sm_next(sfp, SFP_S_INIT_PHY, T_PHY_RETRY);
+                               break;
++                      } else {
++                              dev_info(sfp->dev, "no PHY detected\n");
+                       }
+-                      sfp_sm_link_check_los(sfp);
+-
+-                      /* Reset the fault retry count */
+-                      sfp->sm_fault_retries = N_FAULT;
++              } else if (ret) {
++                      sfp_sm_next(sfp, SFP_S_FAIL, 0);
++                      break;
+               }
++              if (sfp_module_start(sfp->sfp_bus)) {
++                      sfp_sm_next(sfp, SFP_S_FAIL, 0);
++                      break;
++              }
++              sfp_sm_link_check_los(sfp);
++
++              /* Reset the fault retry count */
++              sfp->sm_fault_retries = N_FAULT;
+               break;
+       case SFP_S_INIT_TX_FAULT: