lib: Add umoddi3 and udivmoddi4 of GCC library routines
authorZong Li <zongbox@gmail.com>
Tue, 2 Oct 2018 08:52:29 +0000 (16:52 +0800)
committerPalmer Dabbelt <palmer@sifive.com>
Tue, 23 Oct 2018 00:02:56 +0000 (17:02 -0700)
Add umoddi3 and udivmoddi4 support for 32-bit.

The RV32 need the umoddi3 to do modulo when the operands are long long
type, like other libraries implementation such as ucmpdi2, lshrdi3 and
so on.

I encounter the undefined reference 'umoddi3' when I use the in
house dma driver, although it is in house driver, but I think that
umoddi3 is a common function for RV32.

The udivmoddi4 and umoddi3 are copies from libgcc in gcc. There are other
functions use the udivmoddi4 in libgcc, so I separate the umoddi3 and
udivmoddi4 for flexible extension in the future.

Signed-off-by: Zong Li <zong@andestech.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
lib/Kconfig
lib/Makefile
lib/udivmoddi4.c [new file with mode: 0644]
lib/umoddi3.c [new file with mode: 0644]

index a3928d4438b5008c1fa01470de11245d1557bc33..d82f20609939552b6708b15d8969819d65cb13f7 100644 (file)
@@ -621,3 +621,6 @@ config GENERIC_LIB_CMPDI2
 
 config GENERIC_LIB_UCMPDI2
        bool
+
+config GENERIC_LIB_UMODDI3
+       bool
index 423876446810942b93c9ebba6ffb9bf086b69c8d..56a8d9c23ef3c89534e03b90aa0573fc7020925b 100644 (file)
@@ -270,3 +270,4 @@ obj-$(CONFIG_GENERIC_LIB_LSHRDI3) += lshrdi3.o
 obj-$(CONFIG_GENERIC_LIB_MULDI3) += muldi3.o
 obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o
 obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o
+obj-$(CONFIG_GENERIC_LIB_UMODDI3) += umoddi3.o udivmoddi4.o
diff --git a/lib/udivmoddi4.c b/lib/udivmoddi4.c
new file mode 100644 (file)
index 0000000..c08bc8a
--- /dev/null
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.
+ */
+
+#include <linux/libgcc.h>
+
+#define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz(X))
+
+#define W_TYPE_SIZE 32
+
+#define __ll_B ((unsigned long) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((unsigned long) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((unsigned long) (t) >> (W_TYPE_SIZE / 2))
+
+/* If we still don't have umul_ppmm, define it using plain C. */
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v)                                                \
+       do {                                                            \
+               unsigned long __x0, __x1, __x2, __x3;                   \
+               unsigned short __ul, __vl, __uh, __vh;                  \
+                                                                       \
+               __ul = __ll_lowpart(u);                                 \
+               __uh = __ll_highpart(u);                                \
+               __vl = __ll_lowpart(v);                                 \
+               __vh = __ll_highpart(v);                                \
+                                                                       \
+               __x0 = (unsigned long) __ul * __vl;                     \
+               __x1 = (unsigned long) __ul * __vh;                     \
+               __x2 = (unsigned long) __uh * __vl;                     \
+               __x3 = (unsigned long) __uh * __vh;                     \
+                                                                       \
+               __x1 += __ll_highpart(__x0);                            \
+               __x1 += __x2;                                           \
+               if (__x1 < __x2)                                        \
+                       __x3 += __ll_B;                                 \
+                                                                       \
+               (w1) = __x3 + __ll_highpart(__x1);                      \
+               (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
+       } while (0)
+#endif
+
+#if !defined(sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)                             \
+       do {                                                            \
+               unsigned long __x;                                      \
+               __x = (al) - (bl);                                      \
+               (sh) = (ah) - (bh) - (__x > (al));                      \
+               (sl) = __x;                                             \
+       } while (0)
+#endif
+
+/* Define this unconditionally, so it can be used for debugging. */
+#define __udiv_qrnnd_c(q, r, n1, n0, d)                                        \
+       do {                                                            \
+               unsigned long __d1, __d0, __q1, __q0;                   \
+               unsigned long __r1, __r0, __m;                          \
+               __d1 = __ll_highpart(d);                                \
+               __d0 = __ll_lowpart(d);                         \
+                                                                       \
+               __r1 = (n1) % __d1;                                     \
+               __q1 = (n1) / __d1;                                     \
+               __m = (unsigned long) __q1 * __d0;                      \
+               __r1 = __r1 * __ll_B | __ll_highpart(n0);               \
+               if (__r1 < __m) {                                       \
+                       __q1--, __r1 += (d);                            \
+                       if (__r1 >= (d))                                \
+                               if (__r1 < __m)                         \
+                                       __q1--, __r1 += (d);            \
+               }                                                       \
+               __r1 -= __m;                                            \
+                                                                       \
+               __r0 = __r1 % __d1;                                     \
+               __q0 = __r1 / __d1;                                     \
+               __m = (unsigned long) __q0 * __d0;                      \
+               __r0 = __r0 * __ll_B | __ll_lowpart(n0);                \
+               if (__r0 < __m) {                                       \
+                       __q0--, __r0 += (d);                            \
+                       if (__r0 >= (d))                                \
+                               if (__r0 < __m)                         \
+                                       __q0--, __r0 += (d);            \
+               }                                                       \
+               __r0 -= __m;                                            \
+                                                                       \
+               (q) = (unsigned long) __q1 * __ll_B | __q0;             \
+               (r) = __r0;                                             \
+       } while (0)
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
+#if !defined(udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+unsigned long long __udivmoddi4(unsigned long long u, unsigned long long v,
+                               unsigned long long *rp)
+{
+       const DWunion nn = {.ll = u };
+       const DWunion dd = {.ll = v };
+       DWunion rr, ww;
+       unsigned long d0, d1, n0, n1, n2;
+       unsigned long q0 = 0, q1 = 0;
+       unsigned long b, bm;
+
+       d0 = dd.s.low;
+       d1 = dd.s.high;
+       n0 = nn.s.low;
+       n1 = nn.s.high;
+
+#if !UDIV_NEEDS_NORMALIZATION
+
+       if (d1 == 0) {
+               if (d0 > n1) {
+                       /* 0q = nn / 0D */
+
+                       udiv_qrnnd(q0, n0, n1, n0, d0);
+                       q1 = 0;
+
+                       /* Remainder in n0. */
+               } else {
+                       /* qq = NN / 0d */
+
+                       if (d0 == 0)
+                               /* Divide intentionally by zero. */
+                               d0 = 1 / d0;
+
+                       udiv_qrnnd(q1, n1, 0, n1, d0);
+                       udiv_qrnnd(q0, n0, n1, n0, d0);
+
+                       /* Remainder in n0. */
+               }
+
+               if (rp != 0) {
+                       rr.s.low = n0;
+                       rr.s.high = 0;
+                       *rp = rr.ll;
+               }
+
+#else /* UDIV_NEEDS_NORMALIZATION */
+
+       if (d1 == 0) {
+               if (d0 > n1) {
+                       /* 0q = nn / 0D */
+
+                       count_leading_zeros(bm, d0);
+
+                       if (bm != 0) {
+                               /*
+                                * Normalize, i.e. make the most significant bit
+                                * of the denominator set.
+                                */
+
+                               d0 = d0 << bm;
+                               n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
+                               n0 = n0 << bm;
+                       }
+
+                       udiv_qrnnd(q0, n0, n1, n0, d0);
+                       q1 = 0;
+
+                       /* Remainder in n0 >> bm. */
+               } else {
+                       /* qq = NN / 0d */
+
+                       if (d0 == 0)
+                               /* Divide intentionally by zero. */
+                               d0 = 1 / d0;
+
+                       count_leading_zeros(bm, d0);
+
+                       if (bm == 0) {
+                               /*
+                                * From (n1 >= d0) /\ (the most significant bit
+                                * of d0 is set), conclude (the most significant
+                                * bit of n1 is set) /\ (theleading quotient
+                                * digit q1 = 1).
+                                *
+                                * This special case is necessary, not an
+                                * optimization. (Shifts counts of W_TYPE_SIZE
+                                * are undefined.)
+                                */
+
+                               n1 -= d0;
+                               q1 = 1;
+                       } else {
+                               /* Normalize. */
+
+                               b = W_TYPE_SIZE - bm;
+
+                               d0 = d0 << bm;
+                               n2 = n1 >> b;
+                               n1 = (n1 << bm) | (n0 >> b);
+                               n0 = n0 << bm;
+
+                               udiv_qrnnd(q1, n1, n2, n1, d0);
+                       }
+
+                       /* n1 != d0... */
+
+                       udiv_qrnnd(q0, n0, n1, n0, d0);
+
+                       /* Remainder in n0 >> bm. */
+               }
+
+               if (rp != 0) {
+                       rr.s.low = n0 >> bm;
+                       rr.s.high = 0;
+                       *rp = rr.ll;
+               }
+
+#endif /* UDIV_NEEDS_NORMALIZATION */
+
+       } else {
+               if (d1 > n1) {
+                       /* 00 = nn / DD */
+
+                       q0 = 0;
+                       q1 = 0;
+
+                       /* Remainder in n1n0. */
+                       if (rp != 0) {
+                               rr.s.low = n0;
+                               rr.s.high = n1;
+                               *rp = rr.ll;
+                       }
+               } else {
+                       /* 0q = NN / dd */
+
+                       count_leading_zeros(bm, d1);
+                       if (bm == 0) {
+                               /*
+                                * From (n1 >= d1) /\ (the most significant bit
+                                * of d1 is set), conclude (the most significant
+                                * bit of n1 is set) /\ (the quotient digit q0 =
+                                * 0 or 1).
+                                *
+                                * This special case is necessary, not an
+                                * optimization.
+                                */
+
+                               /*
+                                * The condition on the next line takes
+                                * advantage of that n1 >= d1 (true due to
+                                * program flow).
+                                */
+                               if (n1 > d1 || n0 >= d0) {
+                                       q0 = 1;
+                                       sub_ddmmss(n1, n0, n1, n0, d1, d0);
+                               } else {
+                                       q0 = 0;
+                               }
+
+                               q1 = 0;
+
+                               if (rp != 0) {
+                                       rr.s.low = n0;
+                                       rr.s.high = n1;
+                                       *rp = rr.ll;
+                               }
+                       } else {
+                               unsigned long m1, m0;
+                               /* Normalize. */
+
+                               b = W_TYPE_SIZE - bm;
+
+                               d1 = (d1 << bm) | (d0 >> b);
+                               d0 = d0 << bm;
+                               n2 = n1 >> b;
+                               n1 = (n1 << bm) | (n0 >> b);
+                               n0 = n0 << bm;
+
+                               udiv_qrnnd(q0, n1, n2, n1, d1);
+                               umul_ppmm(m1, m0, q0, d0);
+
+                               if (m1 > n1 || (m1 == n1 && m0 > n0)) {
+                                       q0--;
+                                       sub_ddmmss(m1, m0, m1, m0, d1, d0);
+                               }
+
+                               q1 = 0;
+
+                               /* Remainder in (n1n0 - m1m0) >> bm. */
+                               if (rp != 0) {
+                                       sub_ddmmss(n1, n0, n1, n0, m1, m0);
+                                       rr.s.low = (n1 << b) | (n0 >> bm);
+                                       rr.s.high = n1 >> bm;
+                                       *rp = rr.ll;
+                               }
+                       }
+               }
+       }
+
+       ww.s.low = q0;
+       ww.s.high = q1;
+
+       return ww.ll;
+}
diff --git a/lib/umoddi3.c b/lib/umoddi3.c
new file mode 100644 (file)
index 0000000..d7bbf0f
--- /dev/null
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/libgcc.h>
+
+extern unsigned long long __udivmoddi4(unsigned long long u,
+                                      unsigned long long v,
+                                      unsigned long long *rp);
+
+unsigned long long __umoddi3(unsigned long long u, unsigned long long v)
+{
+       unsigned long long w;
+       (void)__udivmoddi4(u, v, &w);
+       return w;
+}
+EXPORT_SYMBOL(__umoddi3);