media: vicodec: rename and use proper fwht prefix for codec

author Hans Verkuil <hans.verkuil@cisco.com>

Tue, 21 Aug 2018 06:53:34 +0000 (02:53 -0400)

committer Mauro Carvalho Chehab <mchehab+samsung@kernel.org>

Fri, 31 Aug 2018 12:27:43 +0000 (08:27 -0400)
author Hans Verkuil <hans.verkuil@cisco.com>
Tue, 21 Aug 2018 06:53:34 +0000 (02:53 -0400)
committer Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Fri, 31 Aug 2018 12:27:43 +0000 (08:27 -0400)
diff --git a/Documentation/media/uapi/v4l/pixfmt-compressed.rst b/Documentation/media/uapi/v4l/pixfmt-compressed.rst

index d382e7a5c38e03f08e785623cc259f424bc9d9d0..d04b18adac3303caf1b0a899a6d2c001ddcd8522 100644 (file)
--- a/Documentation/media/uapi/v4l/pixfmt-compressed.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
@@ -101,4 +101,4 @@ Compressed Formats
        - 'FWHT'
        - Video elementary stream using a codec based on the Fast Walsh Hadamard
          Transform. This codec is implemented by the vicodec ('Virtual Codec')
-       driver. See the vicodec-codec.h header for more details.
+       driver. See the codec-fwht.h header for more details.
diff --git a/drivers/media/platform/vicodec/Makefile b/drivers/media/platform/vicodec/Makefile

index 197229428953a861aa95484f39f31ee037061114..a27242ff14ad63e0009e5f5c395759ecc82fa908 100644 (file)
--- a/drivers/media/platform/vicodec/Makefile
+++ b/drivers/media/platform/vicodec/Makefile
@@ -1,4 +1,4 @@
  # SPDX-License-Identifier: GPL-2.0
-vicodec-objs := vicodec-core.o vicodec-codec.o
+vicodec-objs := vicodec-core.o codec-fwht.o
  
  obj-$(CONFIG_VIDEO_VICODEC) += vicodec.o
diff --git a/drivers/media/platform/vicodec/codec-fwht.c b/drivers/media/platform/vicodec/codec-fwht.c

new file mode 100644 (file)

index 0000000..f91f90f
--- /dev/null
+++ b/drivers/media/platform/vicodec/codec-fwht.c
@@ -0,0 +1,849 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2016 Tom aan de Wiel
+ * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
+ *
+ * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
+ * R.D. Brown, 1977
+ */
+
+#include <linux/string.h>
+#include "codec-fwht.h"
+
+/*
+ * Note: bit 0 of the header must always be 0. Otherwise it cannot
+ * be guaranteed that the magic 8 byte sequence (see below) can
+ * never occur in the rlc output.
+ */
+#define PFRAME_BIT BIT(15)
+#define DUPS_MASK 0x1ffe
+
+#define PBLOCK 0
+#define IBLOCK 1
+
+#define ALL_ZEROS 15
+
+static const uint8_t zigzag[64] = {
+       0,
+       1,  8,
+       2,  9, 16,
+       3, 10, 17, 24,
+       4, 11, 18, 25, 32,
+       5, 12, 19, 26, 33, 40,
+       6, 13, 20, 27, 34, 41, 48,
+       7, 14, 21, 28, 35, 42, 49, 56,
+       15, 22, 29, 36, 43, 50, 57,
+       23, 30, 37, 44, 51, 58,
+       31, 38, 45, 52, 59,
+       39, 46, 53, 60,
+       47, 54, 61,
+       55, 62,
+       63,
+};
+
+
+static int rlc(const s16 *in, __be16 *output, int blocktype)
+{
+       s16 block[8 * 8];
+       s16 *wp = block;
+       int i = 0;
+       int x, y;
+       int ret = 0;
+
+       /* read in block from framebuffer */
+       int lastzero_run = 0;
+       int to_encode;
+
+       for (y = 0; y < 8; y++) {
+               for (x = 0; x < 8; x++) {
+                       *wp = in[x + y * 8];
+                       wp++;
+               }
+       }
+
+       /* keep track of amount of trailing zeros */
+       for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
+               lastzero_run++;
+
+       *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
+       ret++;
+
+       to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
+
+       i = 0;
+       while (i < to_encode) {
+               int cnt = 0;
+               int tmp;
+
+               /* count leading zeros */
+               while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
+                       cnt++;
+                       i++;
+                       if (i == to_encode) {
+                               cnt--;
+                               break;
+                       }
+               }
+               /* 4 bits for run, 12 for coefficient (quantization by 4) */
+               *output++ = htons((cnt | tmp << 4));
+               i++;
+               ret++;
+       }
+       if (lastzero_run > 14) {
+               *output = htons(ALL_ZEROS | 0);
+               ret++;
+       }
+
+       return ret;
+}
+
+/*
+ * This function will worst-case increase rlc_in by 65*2 bytes:
+ * one s16 value for the header and 8 * 8 coefficients of type s16.
+ */
+static s16 derlc(const __be16 **rlc_in, s16 *dwht_out)
+{
+       /* header */
+       const __be16 *input = *rlc_in;
+       s16 ret = ntohs(*input++);
+       int dec_count = 0;
+       s16 block[8 * 8 + 16];
+       s16 *wp = block;
+       int i;
+
+       /*
+        * Now de-compress, it expands one byte to up to 15 bytes
+        * (or fills the remainder of the 64 bytes with zeroes if it
+        * is the last byte to expand).
+        *
+        * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
+        * allow for overflow if the incoming data was malformed.
+        */
+       while (dec_count < 8 * 8) {
+               s16 in = ntohs(*input++);
+               int length = in & 0xf;
+               int coeff = in >> 4;
+
+               /* fill remainder with zeros */
+               if (length == 15) {
+                       for (i = 0; i < 64 - dec_count; i++)
+                               *wp++ = 0;
+                       break;
+               }
+
+               for (i = 0; i < length; i++)
+                       *wp++ = 0;
+               *wp++ = coeff;
+               dec_count += length + 1;
+       }
+
+       wp = block;
+
+       for (i = 0; i < 64; i++) {
+               int pos = zigzag[i];
+               int y = pos / 8;
+               int x = pos % 8;
+
+               dwht_out[x + y * 8] = *wp++;
+       }
+       *rlc_in = input;
+       return ret;
+}
+
+static const int quant_table[] = {
+       2, 2, 2, 2, 2, 2,  2,  2,
+       2, 2, 2, 2, 2, 2,  2,  2,
+       2, 2, 2, 2, 2, 2,  2,  3,
+       2, 2, 2, 2, 2, 2,  3,  6,
+       2, 2, 2, 2, 2, 3,  6,  6,
+       2, 2, 2, 2, 3, 6,  6,  6,
+       2, 2, 2, 3, 6, 6,  6,  6,
+       2, 2, 3, 6, 6, 6,  6,  8,
+};
+
+static const int quant_table_p[] = {
+       3, 3, 3, 3, 3, 3,  3,  3,
+       3, 3, 3, 3, 3, 3,  3,  3,
+       3, 3, 3, 3, 3, 3,  3,  3,
+       3, 3, 3, 3, 3, 3,  3,  6,
+       3, 3, 3, 3, 3, 3,  6,  6,
+       3, 3, 3, 3, 3, 6,  6,  9,
+       3, 3, 3, 3, 6, 6,  9,  9,
+       3, 3, 3, 6, 6, 9,  9,  10,
+};
+
+static void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp)
+{
+       const int *quant = quant_table;
+       int i, j;
+
+       for (j = 0; j < 8; j++) {
+               for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
+                       *coeff >>= *quant;
+                       if (*coeff >= -qp && *coeff <= qp)
+                               *coeff = *de_coeff = 0;
+                       else
+                               *de_coeff = *coeff << *quant;
+               }
+       }
+}
+
+static void dequantize_intra(s16 *coeff)
+{
+       const int *quant = quant_table;
+       int i, j;
+
+       for (j = 0; j < 8; j++)
+               for (i = 0; i < 8; i++, quant++, coeff++)
+                       *coeff <<= *quant;
+}
+
+static void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp)
+{
+       const int *quant = quant_table_p;
+       int i, j;
+
+       for (j = 0; j < 8; j++) {
+               for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
+                       *coeff >>= *quant;
+                       if (*coeff >= -qp && *coeff <= qp)
+                               *coeff = *de_coeff = 0;
+                       else
+                               *de_coeff = *coeff << *quant;
+               }
+       }
+}
+
+static void dequantize_inter(s16 *coeff)
+{
+       const int *quant = quant_table_p;
+       int i, j;
+
+       for (j = 0; j < 8; j++)
+               for (i = 0; i < 8; i++, quant++, coeff++)
+                       *coeff <<= *quant;
+}
+
+static void fwht(const u8 *block, s16 *output_block, unsigned int stride,
+                unsigned int input_step, bool intra)
+{
+       /* we'll need more than 8 bits for the transformed coefficients */
+       s32 workspace1[8], workspace2[8];
+       const u8 *tmp = block;
+       s16 *out = output_block;
+       int add = intra ? 256 : 0;
+       unsigned int i;
+
+       /* stage 1 */
+       stride *= input_step;
+
+       for (i = 0; i < 8; i++, tmp += stride, out += 8) {
+               switch (input_step) {
+               case 1:
+                       workspace1[0]  = tmp[0] + tmp[1] - add;
+                       workspace1[1]  = tmp[0] - tmp[1];
+
+                       workspace1[2]  = tmp[2] + tmp[3] - add;
+                       workspace1[3]  = tmp[2] - tmp[3];
+
+                       workspace1[4]  = tmp[4] + tmp[5] - add;
+                       workspace1[5]  = tmp[4] - tmp[5];
+
+                       workspace1[6]  = tmp[6] + tmp[7] - add;
+                       workspace1[7]  = tmp[6] - tmp[7];
+                       break;
+               case 2:
+                       workspace1[0]  = tmp[0] + tmp[2] - add;
+                       workspace1[1]  = tmp[0] - tmp[2];
+
+                       workspace1[2]  = tmp[4] + tmp[6] - add;
+                       workspace1[3]  = tmp[4] - tmp[6];
+
+                       workspace1[4]  = tmp[8] + tmp[10] - add;
+                       workspace1[5]  = tmp[8] - tmp[10];
+
+                       workspace1[6]  = tmp[12] + tmp[14] - add;
+                       workspace1[7]  = tmp[12] - tmp[14];
+                       break;
+               case 3:
+                       workspace1[0]  = tmp[0] + tmp[3] - add;
+                       workspace1[1]  = tmp[0] - tmp[3];
+
+                       workspace1[2]  = tmp[6] + tmp[9] - add;
+                       workspace1[3]  = tmp[6] - tmp[9];
+
+                       workspace1[4]  = tmp[12] + tmp[15] - add;
+                       workspace1[5]  = tmp[12] - tmp[15];
+
+                       workspace1[6]  = tmp[18] + tmp[21] - add;
+                       workspace1[7]  = tmp[18] - tmp[21];
+                       break;
+               default:
+                       workspace1[0]  = tmp[0] + tmp[4] - add;
+                       workspace1[1]  = tmp[0] - tmp[4];
+
+                       workspace1[2]  = tmp[8] + tmp[12] - add;
+                       workspace1[3]  = tmp[8] - tmp[12];
+
+                       workspace1[4]  = tmp[16] + tmp[20] - add;
+                       workspace1[5]  = tmp[16] - tmp[20];
+
+                       workspace1[6]  = tmp[24] + tmp[28] - add;
+                       workspace1[7]  = tmp[24] - tmp[28];
+                       break;
+               }
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+
+               /* stage 3 */
+               out[0] = workspace2[0] + workspace2[4];
+               out[1] = workspace2[0] - workspace2[4];
+               out[2] = workspace2[1] - workspace2[5];
+               out[3] = workspace2[1] + workspace2[5];
+               out[4] = workspace2[2] + workspace2[6];
+               out[5] = workspace2[2] - workspace2[6];
+               out[6] = workspace2[3] - workspace2[7];
+               out[7] = workspace2[3] + workspace2[7];
+       }
+
+       out = output_block;
+
+       for (i = 0; i < 8; i++, out++) {
+               /* stage 1 */
+               workspace1[0]  = out[0] + out[1 * 8];
+               workspace1[1]  = out[0] - out[1 * 8];
+
+               workspace1[2]  = out[2 * 8] + out[3 * 8];
+               workspace1[3]  = out[2 * 8] - out[3 * 8];
+
+               workspace1[4]  = out[4 * 8] + out[5 * 8];
+               workspace1[5]  = out[4 * 8] - out[5 * 8];
+
+               workspace1[6]  = out[6 * 8] + out[7 * 8];
+               workspace1[7]  = out[6 * 8] - out[7 * 8];
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+               /* stage 3 */
+               out[0 * 8] = workspace2[0] + workspace2[4];
+               out[1 * 8] = workspace2[0] - workspace2[4];
+               out[2 * 8] = workspace2[1] - workspace2[5];
+               out[3 * 8] = workspace2[1] + workspace2[5];
+               out[4 * 8] = workspace2[2] + workspace2[6];
+               out[5 * 8] = workspace2[2] - workspace2[6];
+               out[6 * 8] = workspace2[3] - workspace2[7];
+               out[7 * 8] = workspace2[3] + workspace2[7];
+       }
+}
+
+/*
+ * Not the nicest way of doing it, but P-blocks get twice the range of
+ * that of the I-blocks. Therefore we need a type bigger than 8 bits.
+ * Furthermore values can be negative... This is just a version that
+ * works with 16 signed data
+ */
+static void fwht16(const s16 *block, s16 *output_block, int stride, int intra)
+{
+       /* we'll need more than 8 bits for the transformed coefficients */
+       s32 workspace1[8], workspace2[8];
+       const s16 *tmp = block;
+       s16 *out = output_block;
+       int i;
+
+       for (i = 0; i < 8; i++, tmp += stride, out += 8) {
+               /* stage 1 */
+               workspace1[0]  = tmp[0] + tmp[1];
+               workspace1[1]  = tmp[0] - tmp[1];
+
+               workspace1[2]  = tmp[2] + tmp[3];
+               workspace1[3]  = tmp[2] - tmp[3];
+
+               workspace1[4]  = tmp[4] + tmp[5];
+               workspace1[5]  = tmp[4] - tmp[5];
+
+               workspace1[6]  = tmp[6] + tmp[7];
+               workspace1[7]  = tmp[6] - tmp[7];
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+
+               /* stage 3 */
+               out[0] = workspace2[0] + workspace2[4];
+               out[1] = workspace2[0] - workspace2[4];
+               out[2] = workspace2[1] - workspace2[5];
+               out[3] = workspace2[1] + workspace2[5];
+               out[4] = workspace2[2] + workspace2[6];
+               out[5] = workspace2[2] - workspace2[6];
+               out[6] = workspace2[3] - workspace2[7];
+               out[7] = workspace2[3] + workspace2[7];
+       }
+
+       out = output_block;
+
+       for (i = 0; i < 8; i++, out++) {
+               /* stage 1 */
+               workspace1[0]  = out[0] + out[1*8];
+               workspace1[1]  = out[0] - out[1*8];
+
+               workspace1[2]  = out[2*8] + out[3*8];
+               workspace1[3]  = out[2*8] - out[3*8];
+
+               workspace1[4]  = out[4*8] + out[5*8];
+               workspace1[5]  = out[4*8] - out[5*8];
+
+               workspace1[6]  = out[6*8] + out[7*8];
+               workspace1[7]  = out[6*8] - out[7*8];
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+
+               /* stage 3 */
+               out[0*8] = workspace2[0] + workspace2[4];
+               out[1*8] = workspace2[0] - workspace2[4];
+               out[2*8] = workspace2[1] - workspace2[5];
+               out[3*8] = workspace2[1] + workspace2[5];
+               out[4*8] = workspace2[2] + workspace2[6];
+               out[5*8] = workspace2[2] - workspace2[6];
+               out[6*8] = workspace2[3] - workspace2[7];
+               out[7*8] = workspace2[3] + workspace2[7];
+       }
+}
+
+static void ifwht(const s16 *block, s16 *output_block, int intra)
+{
+       /*
+        * we'll need more than 8 bits for the transformed coefficients
+        * use native unit of cpu
+        */
+       int workspace1[8], workspace2[8];
+       int inter = intra ? 0 : 1;
+       const s16 *tmp = block;
+       s16 *out = output_block;
+       int i;
+
+       for (i = 0; i < 8; i++, tmp += 8, out += 8) {
+               /* stage 1 */
+               workspace1[0]  = tmp[0] + tmp[1];
+               workspace1[1]  = tmp[0] - tmp[1];
+
+               workspace1[2]  = tmp[2] + tmp[3];
+               workspace1[3]  = tmp[2] - tmp[3];
+
+               workspace1[4]  = tmp[4] + tmp[5];
+               workspace1[5]  = tmp[4] - tmp[5];
+
+               workspace1[6]  = tmp[6] + tmp[7];
+               workspace1[7]  = tmp[6] - tmp[7];
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+
+               /* stage 3 */
+               out[0] = workspace2[0] + workspace2[4];
+               out[1] = workspace2[0] - workspace2[4];
+               out[2] = workspace2[1] - workspace2[5];
+               out[3] = workspace2[1] + workspace2[5];
+               out[4] = workspace2[2] + workspace2[6];
+               out[5] = workspace2[2] - workspace2[6];
+               out[6] = workspace2[3] - workspace2[7];
+               out[7] = workspace2[3] + workspace2[7];
+       }
+
+       out = output_block;
+
+       for (i = 0; i < 8; i++, out++) {
+               /* stage 1 */
+               workspace1[0]  = out[0] + out[1 * 8];
+               workspace1[1]  = out[0] - out[1 * 8];
+
+               workspace1[2]  = out[2 * 8] + out[3 * 8];
+               workspace1[3]  = out[2 * 8] - out[3 * 8];
+
+               workspace1[4]  = out[4 * 8] + out[5 * 8];
+               workspace1[5]  = out[4 * 8] - out[5 * 8];
+
+               workspace1[6]  = out[6 * 8] + out[7 * 8];
+               workspace1[7]  = out[6 * 8] - out[7 * 8];
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+
+               /* stage 3 */
+               if (inter) {
+                       int d;
+
+                       out[0 * 8] = workspace2[0] + workspace2[4];
+                       out[1 * 8] = workspace2[0] - workspace2[4];
+                       out[2 * 8] = workspace2[1] - workspace2[5];
+                       out[3 * 8] = workspace2[1] + workspace2[5];
+                       out[4 * 8] = workspace2[2] + workspace2[6];
+                       out[5 * 8] = workspace2[2] - workspace2[6];
+                       out[6 * 8] = workspace2[3] - workspace2[7];
+                       out[7 * 8] = workspace2[3] + workspace2[7];
+
+                       for (d = 0; d < 8; d++)
+                               out[8 * d] >>= 6;
+               } else {
+                       int d;
+
+                       out[0 * 8] = workspace2[0] + workspace2[4];
+                       out[1 * 8] = workspace2[0] - workspace2[4];
+                       out[2 * 8] = workspace2[1] - workspace2[5];
+                       out[3 * 8] = workspace2[1] + workspace2[5];
+                       out[4 * 8] = workspace2[2] + workspace2[6];
+                       out[5 * 8] = workspace2[2] - workspace2[6];
+                       out[6 * 8] = workspace2[3] - workspace2[7];
+                       out[7 * 8] = workspace2[3] + workspace2[7];
+
+                       for (d = 0; d < 8; d++) {
+                               out[8 * d] >>= 6;
+                               out[8 * d] += 128;
+                       }
+               }
+       }
+}
+
+static void fill_encoder_block(const u8 *input, s16 *dst,
+                              unsigned int stride, unsigned int input_step)
+{
+       int i, j;
+
+       for (i = 0; i < 8; i++) {
+               for (j = 0; j < 8; j++, input += input_step)
+                       *dst++ = *input;
+               input += (stride - 8) * input_step;
+       }
+}
+
+static int var_intra(const s16 *input)
+{
+       int32_t mean = 0;
+       int32_t ret = 0;
+       const s16 *tmp = input;
+       int i;
+
+       for (i = 0; i < 8 * 8; i++, tmp++)
+               mean += *tmp;
+       mean /= 64;
+       tmp = input;
+       for (i = 0; i < 8 * 8; i++, tmp++)
+               ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
+       return ret;
+}
+
+static int var_inter(const s16 *old, const s16 *new)
+{
+       int32_t ret = 0;
+       int i;
+
+       for (i = 0; i < 8 * 8; i++, old++, new++)
+               ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
+       return ret;
+}
+
+static int decide_blocktype(const u8 *cur, const u8 *reference,
+                           s16 *deltablock, unsigned int stride,
+                           unsigned int input_step)
+{
+       s16 tmp[64];
+       s16 old[64];
+       s16 *work = tmp;
+       unsigned int k, l;
+       int vari;
+       int vard;
+
+       fill_encoder_block(cur, tmp, stride, input_step);
+       fill_encoder_block(reference, old, 8, 1);
+       vari = var_intra(tmp);
+
+       for (k = 0; k < 8; k++) {
+               for (l = 0; l < 8; l++) {
+                       *deltablock = *work - *reference;
+                       deltablock++;
+                       work++;
+                       reference++;
+               }
+       }
+       deltablock -= 64;
+       vard = var_inter(old, tmp);
+       return vari <= vard ? IBLOCK : PBLOCK;
+}
+
+static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
+{
+       int i, j;
+
+       for (i = 0; i < 8; i++) {
+               for (j = 0; j < 8; j++)
+                       *dst++ = *input++;
+               dst += stride - 8;
+       }
+}
+
+static void add_deltas(s16 *deltas, const u8 *ref, int stride)
+{
+       int k, l;
+
+       for (k = 0; k < 8; k++) {
+               for (l = 0; l < 8; l++) {
+                       *deltas += *ref++;
+                       /*
+                        * Due to quantizing, it might possible that the
+                        * decoded coefficients are slightly out of range
+                        */
+                       if (*deltas < 0)
+                               *deltas = 0;
+                       else if (*deltas > 255)
+                               *deltas = 255;
+                       deltas++;
+               }
+               ref += stride - 8;
+       }
+}
+
+static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
+                       struct fwht_cframe *cf, u32 height, u32 width,
+                       unsigned int input_step,
+                       bool is_intra, bool next_is_intra)
+{
+       u8 *input_start = input;
+       __be16 *rlco_start = *rlco;
+       s16 deltablock[64];
+       __be16 pframe_bit = htons(PFRAME_BIT);
+       u32 encoding = 0;
+       unsigned int last_size = 0;
+       unsigned int i, j;
+
+       for (j = 0; j < height / 8; j++) {
+               for (i = 0; i < width / 8; i++) {
+                       /* intra code, first frame is always intra coded. */
+                       int blocktype = IBLOCK;
+                       unsigned int size;
+
+                       if (!is_intra)
+                               blocktype = decide_blocktype(input, refp,
+                                       deltablock, width, input_step);
+                       if (blocktype == IBLOCK) {
+                               fwht(input, cf->coeffs, width, input_step, 1);
+                               quantize_intra(cf->coeffs, cf->de_coeffs,
+                                              cf->i_frame_qp);
+                       } else {
+                               /* inter code */
+                               encoding |= FWHT_FRAME_PCODED;
+                               fwht16(deltablock, cf->coeffs, 8, 0);
+                               quantize_inter(cf->coeffs, cf->de_coeffs,
+                                              cf->p_frame_qp);
+                       }
+                       if (!next_is_intra) {
+                               ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
+
+                               if (blocktype == PBLOCK)
+                                       add_deltas(cf->de_fwht, refp, 8);
+                               fill_decoder_block(refp, cf->de_fwht, 8);
+                       }
+
+                       input += 8 * input_step;
+                       refp += 8 * 8;
+
+                       size = rlc(cf->coeffs, *rlco, blocktype);
+                       if (last_size == size &&
+                           !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
+                               __be16 *last_rlco = *rlco - size;
+                               s16 hdr = ntohs(*last_rlco);
+
+                               if (!((*last_rlco ^ **rlco) & pframe_bit) &&
+                                   (hdr & DUPS_MASK) < DUPS_MASK)
+                                       *last_rlco = htons(hdr + 2);
+                               else
+                                       *rlco += size;
+                       } else {
+                               *rlco += size;
+                       }
+                       if (*rlco >= rlco_max) {
+                               encoding |= FWHT_FRAME_UNENCODED;
+                               goto exit_loop;
+                       }
+                       last_size = size;
+               }
+               input += width * 7 * input_step;
+       }
+
+exit_loop:
+       if (encoding & FWHT_FRAME_UNENCODED) {
+               u8 *out = (u8 *)rlco_start;
+
+               input = input_start;
+               /*
+                * The compressed stream should never contain the magic
+                * header, so when we copy the YUV data we replace 0xff
+                * by 0xfe. Since YUV is limited range such values
+                * shouldn't appear anyway.
+                */
+               for (i = 0; i < height * width; i++, input += input_step)
+                       *out++ = (*input == 0xff) ? 0xfe : *input;
+               *rlco = (__be16 *)out;
+               encoding &= ~FWHT_FRAME_PCODED;
+       }
+       return encoding;
+}
+
+u32 fwht_encode_frame(struct fwht_raw_frame *frm,
+                     struct fwht_raw_frame *ref_frm,
+                     struct fwht_cframe *cf,
+                     bool is_intra, bool next_is_intra)
+{
+       unsigned int size = frm->height * frm->width;
+       __be16 *rlco = cf->rlc_data;
+       __be16 *rlco_max;
+       u32 encoding;
+       u32 chroma_h = frm->height / frm->height_div;
+       u32 chroma_w = frm->width / frm->width_div;
+       unsigned int chroma_size = chroma_h * chroma_w;
+
+       rlco_max = rlco + size / 2 - 256;
+       encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
+                               frm->height, frm->width,
+                               frm->luma_step, is_intra, next_is_intra);
+       if (encoding & FWHT_FRAME_UNENCODED)
+               encoding |= FWHT_LUMA_UNENCODED;
+       encoding &= ~FWHT_FRAME_UNENCODED;
+       rlco_max = rlco + chroma_size / 2 - 256;
+       encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, cf,
+                                chroma_h, chroma_w,
+                                frm->chroma_step, is_intra, next_is_intra);
+       if (encoding & FWHT_FRAME_UNENCODED)
+               encoding |= FWHT_CB_UNENCODED;
+       encoding &= ~FWHT_FRAME_UNENCODED;
+       rlco_max = rlco + chroma_size / 2 - 256;
+       encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, cf,
+                                chroma_h, chroma_w,
+                                frm->chroma_step, is_intra, next_is_intra);
+       if (encoding & FWHT_FRAME_UNENCODED)
+               encoding |= FWHT_CR_UNENCODED;
+       encoding &= ~FWHT_FRAME_UNENCODED;
+       cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
+       return encoding;
+}
+
+static void decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
+                        u32 height, u32 width, bool uncompressed)
+{
+       unsigned int copies = 0;
+       s16 copy[8 * 8];
+       s16 stat;
+       unsigned int i, j;
+
+       if (uncompressed) {
+               memcpy(ref, *rlco, width * height);
+               *rlco += width * height / 2;
+               return;
+       }
+
+       /*
+        * When decoding each macroblock the rlco pointer will be increased
+        * by 65 * 2 bytes worst-case.
+        * To avoid overflow the buffer has to be 65/64th of the actual raw
+        * image size, just in case someone feeds it malicious data.
+        */
+       for (j = 0; j < height / 8; j++) {
+               for (i = 0; i < width / 8; i++) {
+                       u8 *refp = ref + j * 8 * width + i * 8;
+
+                       if (copies) {
+                               memcpy(cf->de_fwht, copy, sizeof(copy));
+                               if (stat & PFRAME_BIT)
+                                       add_deltas(cf->de_fwht, refp, width);
+                               fill_decoder_block(refp, cf->de_fwht, width);
+                               copies--;
+                               continue;
+                       }
+
+                       stat = derlc(rlco, cf->coeffs);
+
+                       if (stat & PFRAME_BIT)
+                               dequantize_inter(cf->coeffs);
+                       else
+                               dequantize_intra(cf->coeffs);
+
+                       ifwht(cf->coeffs, cf->de_fwht,
+                             (stat & PFRAME_BIT) ? 0 : 1);
+
+                       copies = (stat & DUPS_MASK) >> 1;
+                       if (copies)
+                               memcpy(copy, cf->de_fwht, sizeof(copy));
+                       if (stat & PFRAME_BIT)
+                               add_deltas(cf->de_fwht, refp, width);
+                       fill_decoder_block(refp, cf->de_fwht, width);
+               }
+       }
+}
+
+void fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
+                      u32 hdr_flags)
+{
+       const __be16 *rlco = cf->rlc_data;
+       u32 h = cf->height / 2;
+       u32 w = cf->width / 2;
+
+       if (hdr_flags & FWHT_FL_CHROMA_FULL_HEIGHT)
+               h *= 2;
+       if (hdr_flags & FWHT_FL_CHROMA_FULL_WIDTH)
+               w *= 2;
+       decode_plane(cf, &rlco, ref->luma, cf->height, cf->width,
+                    hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED);
+       decode_plane(cf, &rlco, ref->cb, h, w,
+                    hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED);
+       decode_plane(cf, &rlco, ref->cr, h, w,
+                    hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED);
+}
diff --git a/drivers/media/platform/vicodec/codec-fwht.h b/drivers/media/platform/vicodec/codec-fwht.h

new file mode 100644 (file)

index 0000000..1f9e473
--- /dev/null
+++ b/drivers/media/platform/vicodec/codec-fwht.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2016 Tom aan de Wiel
+ * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ */
+
+#ifndef CODEC_FWHT_H
+#define CODEC_FWHT_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <asm/byteorder.h>
+
+/*
+ * The compressed format consists of a fwht_cframe_hdr struct followed by the
+ * compressed frame data. The header contains the size of that data.
+ * Each Y, Cb and Cr plane is compressed separately. If the compressed
+ * size of each plane becomes larger than the uncompressed size, then
+ * that plane is stored uncompressed and the corresponding bit is set
+ * in the flags field of the header.
+ *
+ * Each compressed plane consists of macroblocks and each macroblock
+ * is run-length-encoded. Each macroblock starts with a 16 bit value.
+ * Bit 15 indicates if this is a P-coded macroblock (1) or not (0).
+ * P-coded macroblocks contain a delta against the previous frame.
+ *
+ * Bits 1-12 contain a number. If non-zero, then this same macroblock
+ * repeats that number of times. This results in a high degree of
+ * compression for generated images like colorbars.
+ *
+ * Following this macroblock header the MB coefficients are run-length
+ * encoded: the top 12 bits contain the coefficient, the bottom 4 bits
+ * tell how many times this coefficient occurs. The value 0xf indicates
+ * that the remainder of the macroblock should be filled with zeroes.
+ *
+ * All 16 and 32 bit values are stored in big-endian (network) order.
+ *
+ * Each fwht_cframe_hdr starts with an 8 byte magic header that is
+ * guaranteed not to occur in the compressed frame data. This header
+ * can be used to sync to the next frame.
+ *
+ * This codec uses the Fast Walsh Hadamard Transform. Tom aan de Wiel
+ * developed this as part of a university project, specifically for use
+ * with this driver. His project report can be found here:
+ *
+ * https://hverkuil.home.xs4all.nl/fwht.pdf
+ */
+
+/*
+ * This is a sequence of 8 bytes with the low 4 bits set to 0xf.
+ *
+ * This sequence cannot occur in the encoded data
+ *
+ * Note that these two magic values are symmetrical so endian issues here.
+ */
+#define FWHT_MAGIC1 0x4f4f4f4f
+#define FWHT_MAGIC2 0xffffffff
+
+#define FWHT_VERSION 1
+
+/* Set if this is an interlaced format */
+#define FWHT_FL_IS_INTERLACED          BIT(0)
+/* Set if this is a bottom-first (NTSC) interlaced format */
+#define FWHT_FL_IS_BOTTOM_FIRST                BIT(1)
+/* Set if each 'frame' contains just one field */
+#define FWHT_FL_IS_ALTERNATE           BIT(2)
+/*
+ * If FWHT_FL_IS_ALTERNATE was set, then this is set if this
+ * 'frame' is the bottom field, else it is the top field.
+ */
+#define FWHT_FL_IS_BOTTOM_FIELD                BIT(3)
+/* Set if this frame is uncompressed */
+#define FWHT_FL_LUMA_IS_UNCOMPRESSED   BIT(4)
+#define FWHT_FL_CB_IS_UNCOMPRESSED     BIT(5)
+#define FWHT_FL_CR_IS_UNCOMPRESSED     BIT(6)
+#define FWHT_FL_CHROMA_FULL_HEIGHT     BIT(7)
+#define FWHT_FL_CHROMA_FULL_WIDTH      BIT(8)
+
+struct fwht_cframe_hdr {
+       u32 magic1;
+       u32 magic2;
+       __be32 version;
+       __be32 width, height;
+       __be32 flags;
+       __be32 colorspace;
+       __be32 xfer_func;
+       __be32 ycbcr_enc;
+       __be32 quantization;
+       __be32 size;
+};
+
+struct fwht_cframe {
+       unsigned int width, height;
+       u16 i_frame_qp;
+       u16 p_frame_qp;
+       __be16 *rlc_data;
+       s16 coeffs[8 * 8];
+       s16 de_coeffs[8 * 8];
+       s16 de_fwht[8 * 8];
+       u32 size;
+};
+
+struct fwht_raw_frame {
+       unsigned int width, height;
+       unsigned int width_div;
+       unsigned int height_div;
+       unsigned int luma_step;
+       unsigned int chroma_step;
+       u8 *luma, *cb, *cr;
+};
+
+#define FWHT_FRAME_PCODED      BIT(0)
+#define FWHT_FRAME_UNENCODED   BIT(1)
+#define FWHT_LUMA_UNENCODED    BIT(2)
+#define FWHT_CB_UNENCODED      BIT(3)
+#define FWHT_CR_UNENCODED      BIT(4)
+
+u32 fwht_encode_frame(struct fwht_raw_frame *frm,
+                     struct fwht_raw_frame *ref_frm,
+                     struct fwht_cframe *cf,
+                     bool is_intra, bool next_is_intra);
+void fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
+                      u32 hdr_flags);
+
+#endif
diff --git a/drivers/media/platform/vicodec/vicodec-codec.c b/drivers/media/platform/vicodec/vicodec-codec.c

deleted file mode 100644 (file)

index 3547129..0000000
--- a/drivers/media/platform/vicodec/vicodec-codec.c
+++ /dev/null
@@ -1,835 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright 2016 Tom aan de Wiel
- * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
- *
- * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
- *
- * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
- * R.D. Brown, 1977
- */
-
-#include <linux/string.h>
-#include "vicodec-codec.h"
-
-#define ALL_ZEROS 15
-
-static const uint8_t zigzag[64] = {
-       0,
-       1,  8,
-       2,  9, 16,
-       3, 10, 17, 24,
-       4, 11, 18, 25, 32,
-       5, 12, 19, 26, 33, 40,
-       6, 13, 20, 27, 34, 41, 48,
-       7, 14, 21, 28, 35, 42, 49, 56,
-       15, 22, 29, 36, 43, 50, 57,
-       23, 30, 37, 44, 51, 58,
-       31, 38, 45, 52, 59,
-       39, 46, 53, 60,
-       47, 54, 61,
-       55, 62,
-       63,
-};
-
-
-static int rlc(const s16 *in, __be16 *output, int blocktype)
-{
-       s16 block[8 * 8];
-       s16 *wp = block;
-       int i = 0;
-       int x, y;
-       int ret = 0;
-
-       /* read in block from framebuffer */
-       int lastzero_run = 0;
-       int to_encode;
-
-       for (y = 0; y < 8; y++) {
-               for (x = 0; x < 8; x++) {
-                       *wp = in[x + y * 8];
-                       wp++;
-               }
-       }
-
-       /* keep track of amount of trailing zeros */
-       for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
-               lastzero_run++;
-
-       *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
-       ret++;
-
-       to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
-
-       i = 0;
-       while (i < to_encode) {
-               int cnt = 0;
-               int tmp;
-
-               /* count leading zeros */
-               while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
-                       cnt++;
-                       i++;
-                       if (i == to_encode) {
-                               cnt--;
-                               break;
-                       }
-               }
-               /* 4 bits for run, 12 for coefficient (quantization by 4) */
-               *output++ = htons((cnt | tmp << 4));
-               i++;
-               ret++;
-       }
-       if (lastzero_run > 14) {
-               *output = htons(ALL_ZEROS | 0);
-               ret++;
-       }
-
-       return ret;
-}
-
-/*
- * This function will worst-case increase rlc_in by 65*2 bytes:
- * one s16 value for the header and 8 * 8 coefficients of type s16.
- */
-static s16 derlc(const __be16 **rlc_in, s16 *dwht_out)
-{
-       /* header */
-       const __be16 *input = *rlc_in;
-       s16 ret = ntohs(*input++);
-       int dec_count = 0;
-       s16 block[8 * 8 + 16];
-       s16 *wp = block;
-       int i;
-
-       /*
-        * Now de-compress, it expands one byte to up to 15 bytes
-        * (or fills the remainder of the 64 bytes with zeroes if it
-        * is the last byte to expand).
-        *
-        * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
-        * allow for overflow if the incoming data was malformed.
-        */
-       while (dec_count < 8 * 8) {
-               s16 in = ntohs(*input++);
-               int length = in & 0xf;
-               int coeff = in >> 4;
-
-               /* fill remainder with zeros */
-               if (length == 15) {
-                       for (i = 0; i < 64 - dec_count; i++)
-                               *wp++ = 0;
-                       break;
-               }
-
-               for (i = 0; i < length; i++)
-                       *wp++ = 0;
-               *wp++ = coeff;
-               dec_count += length + 1;
-       }
-
-       wp = block;
-
-       for (i = 0; i < 64; i++) {
-               int pos = zigzag[i];
-               int y = pos / 8;
-               int x = pos % 8;
-
-               dwht_out[x + y * 8] = *wp++;
-       }
-       *rlc_in = input;
-       return ret;
-}
-
-static const int quant_table[] = {
-       2, 2, 2, 2, 2, 2,  2,  2,
-       2, 2, 2, 2, 2, 2,  2,  2,
-       2, 2, 2, 2, 2, 2,  2,  3,
-       2, 2, 2, 2, 2, 2,  3,  6,
-       2, 2, 2, 2, 2, 3,  6,  6,
-       2, 2, 2, 2, 3, 6,  6,  6,
-       2, 2, 2, 3, 6, 6,  6,  6,
-       2, 2, 3, 6, 6, 6,  6,  8,
-};
-
-static const int quant_table_p[] = {
-       3, 3, 3, 3, 3, 3,  3,  3,
-       3, 3, 3, 3, 3, 3,  3,  3,
-       3, 3, 3, 3, 3, 3,  3,  3,
-       3, 3, 3, 3, 3, 3,  3,  6,
-       3, 3, 3, 3, 3, 3,  6,  6,
-       3, 3, 3, 3, 3, 6,  6,  9,
-       3, 3, 3, 3, 6, 6,  9,  9,
-       3, 3, 3, 6, 6, 9,  9,  10,
-};
-
-static void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp)
-{
-       const int *quant = quant_table;
-       int i, j;
-
-       for (j = 0; j < 8; j++) {
-               for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
-                       *coeff >>= *quant;
-                       if (*coeff >= -qp && *coeff <= qp)
-                               *coeff = *de_coeff = 0;
-                       else
-                               *de_coeff = *coeff << *quant;
-               }
-       }
-}
-
-static void dequantize_intra(s16 *coeff)
-{
-       const int *quant = quant_table;
-       int i, j;
-
-       for (j = 0; j < 8; j++)
-               for (i = 0; i < 8; i++, quant++, coeff++)
-                       *coeff <<= *quant;
-}
-
-static void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp)
-{
-       const int *quant = quant_table_p;
-       int i, j;
-
-       for (j = 0; j < 8; j++) {
-               for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
-                       *coeff >>= *quant;
-                       if (*coeff >= -qp && *coeff <= qp)
-                               *coeff = *de_coeff = 0;
-                       else
-                               *de_coeff = *coeff << *quant;
-               }
-       }
-}
-
-static void dequantize_inter(s16 *coeff)
-{
-       const int *quant = quant_table_p;
-       int i, j;
-
-       for (j = 0; j < 8; j++)
-               for (i = 0; i < 8; i++, quant++, coeff++)
-                       *coeff <<= *quant;
-}
-
-static void fwht(const u8 *block, s16 *output_block, unsigned int stride,
-                unsigned int input_step, bool intra)
-{
-       /* we'll need more than 8 bits for the transformed coefficients */
-       s32 workspace1[8], workspace2[8];
-       const u8 *tmp = block;
-       s16 *out = output_block;
-       int add = intra ? 256 : 0;
-       unsigned int i;
-
-       /* stage 1 */
-       stride *= input_step;
-
-       for (i = 0; i < 8; i++, tmp += stride, out += 8) {
-               switch (input_step) {
-               case 1:
-                       workspace1[0]  = tmp[0] + tmp[1] - add;
-                       workspace1[1]  = tmp[0] - tmp[1];
-
-                       workspace1[2]  = tmp[2] + tmp[3] - add;
-                       workspace1[3]  = tmp[2] - tmp[3];
-
-                       workspace1[4]  = tmp[4] + tmp[5] - add;
-                       workspace1[5]  = tmp[4] - tmp[5];
-
-                       workspace1[6]  = tmp[6] + tmp[7] - add;
-                       workspace1[7]  = tmp[6] - tmp[7];
-                       break;
-               case 2:
-                       workspace1[0]  = tmp[0] + tmp[2] - add;
-                       workspace1[1]  = tmp[0] - tmp[2];
-
-                       workspace1[2]  = tmp[4] + tmp[6] - add;
-                       workspace1[3]  = tmp[4] - tmp[6];
-
-                       workspace1[4]  = tmp[8] + tmp[10] - add;
-                       workspace1[5]  = tmp[8] - tmp[10];
-
-                       workspace1[6]  = tmp[12] + tmp[14] - add;
-                       workspace1[7]  = tmp[12] - tmp[14];
-                       break;
-               case 3:
-                       workspace1[0]  = tmp[0] + tmp[3] - add;
-                       workspace1[1]  = tmp[0] - tmp[3];
-
-                       workspace1[2]  = tmp[6] + tmp[9] - add;
-                       workspace1[3]  = tmp[6] - tmp[9];
-
-                       workspace1[4]  = tmp[12] + tmp[15] - add;
-                       workspace1[5]  = tmp[12] - tmp[15];
-
-                       workspace1[6]  = tmp[18] + tmp[21] - add;
-                       workspace1[7]  = tmp[18] - tmp[21];
-                       break;
-               default:
-                       workspace1[0]  = tmp[0] + tmp[4] - add;
-                       workspace1[1]  = tmp[0] - tmp[4];
-
-                       workspace1[2]  = tmp[8] + tmp[12] - add;
-                       workspace1[3]  = tmp[8] - tmp[12];
-
-                       workspace1[4]  = tmp[16] + tmp[20] - add;
-                       workspace1[5]  = tmp[16] - tmp[20];
-
-                       workspace1[6]  = tmp[24] + tmp[28] - add;
-                       workspace1[7]  = tmp[24] - tmp[28];
-                       break;
-               }
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-
-               /* stage 3 */
-               out[0] = workspace2[0] + workspace2[4];
-               out[1] = workspace2[0] - workspace2[4];
-               out[2] = workspace2[1] - workspace2[5];
-               out[3] = workspace2[1] + workspace2[5];
-               out[4] = workspace2[2] + workspace2[6];
-               out[5] = workspace2[2] - workspace2[6];
-               out[6] = workspace2[3] - workspace2[7];
-               out[7] = workspace2[3] + workspace2[7];
-       }
-
-       out = output_block;
-
-       for (i = 0; i < 8; i++, out++) {
-               /* stage 1 */
-               workspace1[0]  = out[0] + out[1 * 8];
-               workspace1[1]  = out[0] - out[1 * 8];
-
-               workspace1[2]  = out[2 * 8] + out[3 * 8];
-               workspace1[3]  = out[2 * 8] - out[3 * 8];
-
-               workspace1[4]  = out[4 * 8] + out[5 * 8];
-               workspace1[5]  = out[4 * 8] - out[5 * 8];
-
-               workspace1[6]  = out[6 * 8] + out[7 * 8];
-               workspace1[7]  = out[6 * 8] - out[7 * 8];
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-               /* stage 3 */
-               out[0 * 8] = workspace2[0] + workspace2[4];
-               out[1 * 8] = workspace2[0] - workspace2[4];
-               out[2 * 8] = workspace2[1] - workspace2[5];
-               out[3 * 8] = workspace2[1] + workspace2[5];
-               out[4 * 8] = workspace2[2] + workspace2[6];
-               out[5 * 8] = workspace2[2] - workspace2[6];
-               out[6 * 8] = workspace2[3] - workspace2[7];
-               out[7 * 8] = workspace2[3] + workspace2[7];
-       }
-}
-
-/*
- * Not the nicest way of doing it, but P-blocks get twice the range of
- * that of the I-blocks. Therefore we need a type bigger than 8 bits.
- * Furthermore values can be negative... This is just a version that
- * works with 16 signed data
- */
-static void fwht16(const s16 *block, s16 *output_block, int stride, int intra)
-{
-       /* we'll need more than 8 bits for the transformed coefficients */
-       s32 workspace1[8], workspace2[8];
-       const s16 *tmp = block;
-       s16 *out = output_block;
-       int i;
-
-       for (i = 0; i < 8; i++, tmp += stride, out += 8) {
-               /* stage 1 */
-               workspace1[0]  = tmp[0] + tmp[1];
-               workspace1[1]  = tmp[0] - tmp[1];
-
-               workspace1[2]  = tmp[2] + tmp[3];
-               workspace1[3]  = tmp[2] - tmp[3];
-
-               workspace1[4]  = tmp[4] + tmp[5];
-               workspace1[5]  = tmp[4] - tmp[5];
-
-               workspace1[6]  = tmp[6] + tmp[7];
-               workspace1[7]  = tmp[6] - tmp[7];
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-
-               /* stage 3 */
-               out[0] = workspace2[0] + workspace2[4];
-               out[1] = workspace2[0] - workspace2[4];
-               out[2] = workspace2[1] - workspace2[5];
-               out[3] = workspace2[1] + workspace2[5];
-               out[4] = workspace2[2] + workspace2[6];
-               out[5] = workspace2[2] - workspace2[6];
-               out[6] = workspace2[3] - workspace2[7];
-               out[7] = workspace2[3] + workspace2[7];
-       }
-
-       out = output_block;
-
-       for (i = 0; i < 8; i++, out++) {
-               /* stage 1 */
-               workspace1[0]  = out[0] + out[1*8];
-               workspace1[1]  = out[0] - out[1*8];
-
-               workspace1[2]  = out[2*8] + out[3*8];
-               workspace1[3]  = out[2*8] - out[3*8];
-
-               workspace1[4]  = out[4*8] + out[5*8];
-               workspace1[5]  = out[4*8] - out[5*8];
-
-               workspace1[6]  = out[6*8] + out[7*8];
-               workspace1[7]  = out[6*8] - out[7*8];
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-
-               /* stage 3 */
-               out[0*8] = workspace2[0] + workspace2[4];
-               out[1*8] = workspace2[0] - workspace2[4];
-               out[2*8] = workspace2[1] - workspace2[5];
-               out[3*8] = workspace2[1] + workspace2[5];
-               out[4*8] = workspace2[2] + workspace2[6];
-               out[5*8] = workspace2[2] - workspace2[6];
-               out[6*8] = workspace2[3] - workspace2[7];
-               out[7*8] = workspace2[3] + workspace2[7];
-       }
-}
-
-static void ifwht(const s16 *block, s16 *output_block, int intra)
-{
-       /*
-        * we'll need more than 8 bits for the transformed coefficients
-        * use native unit of cpu
-        */
-       int workspace1[8], workspace2[8];
-       int inter = intra ? 0 : 1;
-       const s16 *tmp = block;
-       s16 *out = output_block;
-       int i;
-
-       for (i = 0; i < 8; i++, tmp += 8, out += 8) {
-               /* stage 1 */
-               workspace1[0]  = tmp[0] + tmp[1];
-               workspace1[1]  = tmp[0] - tmp[1];
-
-               workspace1[2]  = tmp[2] + tmp[3];
-               workspace1[3]  = tmp[2] - tmp[3];
-
-               workspace1[4]  = tmp[4] + tmp[5];
-               workspace1[5]  = tmp[4] - tmp[5];
-
-               workspace1[6]  = tmp[6] + tmp[7];
-               workspace1[7]  = tmp[6] - tmp[7];
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-
-               /* stage 3 */
-               out[0] = workspace2[0] + workspace2[4];
-               out[1] = workspace2[0] - workspace2[4];
-               out[2] = workspace2[1] - workspace2[5];
-               out[3] = workspace2[1] + workspace2[5];
-               out[4] = workspace2[2] + workspace2[6];
-               out[5] = workspace2[2] - workspace2[6];
-               out[6] = workspace2[3] - workspace2[7];
-               out[7] = workspace2[3] + workspace2[7];
-       }
-
-       out = output_block;
-
-       for (i = 0; i < 8; i++, out++) {
-               /* stage 1 */
-               workspace1[0]  = out[0] + out[1 * 8];
-               workspace1[1]  = out[0] - out[1 * 8];
-
-               workspace1[2]  = out[2 * 8] + out[3 * 8];
-               workspace1[3]  = out[2 * 8] - out[3 * 8];
-
-               workspace1[4]  = out[4 * 8] + out[5 * 8];
-               workspace1[5]  = out[4 * 8] - out[5 * 8];
-
-               workspace1[6]  = out[6 * 8] + out[7 * 8];
-               workspace1[7]  = out[6 * 8] - out[7 * 8];
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-
-               /* stage 3 */
-               if (inter) {
-                       int d;
-
-                       out[0 * 8] = workspace2[0] + workspace2[4];
-                       out[1 * 8] = workspace2[0] - workspace2[4];
-                       out[2 * 8] = workspace2[1] - workspace2[5];
-                       out[3 * 8] = workspace2[1] + workspace2[5];
-                       out[4 * 8] = workspace2[2] + workspace2[6];
-                       out[5 * 8] = workspace2[2] - workspace2[6];
-                       out[6 * 8] = workspace2[3] - workspace2[7];
-                       out[7 * 8] = workspace2[3] + workspace2[7];
-
-                       for (d = 0; d < 8; d++)
-                               out[8 * d] >>= 6;
-               } else {
-                       int d;
-
-                       out[0 * 8] = workspace2[0] + workspace2[4];
-                       out[1 * 8] = workspace2[0] - workspace2[4];
-                       out[2 * 8] = workspace2[1] - workspace2[5];
-                       out[3 * 8] = workspace2[1] + workspace2[5];
-                       out[4 * 8] = workspace2[2] + workspace2[6];
-                       out[5 * 8] = workspace2[2] - workspace2[6];
-                       out[6 * 8] = workspace2[3] - workspace2[7];
-                       out[7 * 8] = workspace2[3] + workspace2[7];
-
-                       for (d = 0; d < 8; d++) {
-                               out[8 * d] >>= 6;
-                               out[8 * d] += 128;
-                       }
-               }
-       }
-}
-
-static void fill_encoder_block(const u8 *input, s16 *dst,
-                              unsigned int stride, unsigned int input_step)
-{
-       int i, j;
-
-       for (i = 0; i < 8; i++) {
-               for (j = 0; j < 8; j++, input += input_step)
-                       *dst++ = *input;
-               input += (stride - 8) * input_step;
-       }
-}
-
-static int var_intra(const s16 *input)
-{
-       int32_t mean = 0;
-       int32_t ret = 0;
-       const s16 *tmp = input;
-       int i;
-
-       for (i = 0; i < 8 * 8; i++, tmp++)
-               mean += *tmp;
-       mean /= 64;
-       tmp = input;
-       for (i = 0; i < 8 * 8; i++, tmp++)
-               ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
-       return ret;
-}
-
-static int var_inter(const s16 *old, const s16 *new)
-{
-       int32_t ret = 0;
-       int i;
-
-       for (i = 0; i < 8 * 8; i++, old++, new++)
-               ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
-       return ret;
-}
-
-static int decide_blocktype(const u8 *cur, const u8 *reference,
-                           s16 *deltablock, unsigned int stride,
-                           unsigned int input_step)
-{
-       s16 tmp[64];
-       s16 old[64];
-       s16 *work = tmp;
-       unsigned int k, l;
-       int vari;
-       int vard;
-
-       fill_encoder_block(cur, tmp, stride, input_step);
-       fill_encoder_block(reference, old, 8, 1);
-       vari = var_intra(tmp);
-
-       for (k = 0; k < 8; k++) {
-               for (l = 0; l < 8; l++) {
-                       *deltablock = *work - *reference;
-                       deltablock++;
-                       work++;
-                       reference++;
-               }
-       }
-       deltablock -= 64;
-       vard = var_inter(old, tmp);
-       return vari <= vard ? IBLOCK : PBLOCK;
-}
-
-static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
-{
-       int i, j;
-
-       for (i = 0; i < 8; i++) {
-               for (j = 0; j < 8; j++)
-                       *dst++ = *input++;
-               dst += stride - 8;
-       }
-}
-
-static void add_deltas(s16 *deltas, const u8 *ref, int stride)
-{
-       int k, l;
-
-       for (k = 0; k < 8; k++) {
-               for (l = 0; l < 8; l++) {
-                       *deltas += *ref++;
-                       /*
-                        * Due to quantizing, it might possible that the
-                        * decoded coefficients are slightly out of range
-                        */
-                       if (*deltas < 0)
-                               *deltas = 0;
-                       else if (*deltas > 255)
-                               *deltas = 255;
-                       deltas++;
-               }
-               ref += stride - 8;
-       }
-}
-
-static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
-                       struct cframe *cf, u32 height, u32 width,
-                       unsigned int input_step,
-                       bool is_intra, bool next_is_intra)
-{
-       u8 *input_start = input;
-       __be16 *rlco_start = *rlco;
-       s16 deltablock[64];
-       __be16 pframe_bit = htons(PFRAME_BIT);
-       u32 encoding = 0;
-       unsigned int last_size = 0;
-       unsigned int i, j;
-
-       for (j = 0; j < height / 8; j++) {
-               for (i = 0; i < width / 8; i++) {
-                       /* intra code, first frame is always intra coded. */
-                       int blocktype = IBLOCK;
-                       unsigned int size;
-
-                       if (!is_intra)
-                               blocktype = decide_blocktype(input, refp,
-                                       deltablock, width, input_step);
-                       if (blocktype == IBLOCK) {
-                               fwht(input, cf->coeffs, width, input_step, 1);
-                               quantize_intra(cf->coeffs, cf->de_coeffs,
-                                              cf->i_frame_qp);
-                       } else {
-                               /* inter code */
-                               encoding |= FRAME_PCODED;
-                               fwht16(deltablock, cf->coeffs, 8, 0);
-                               quantize_inter(cf->coeffs, cf->de_coeffs,
-                                              cf->p_frame_qp);
-                       }
-                       if (!next_is_intra) {
-                               ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
-
-                               if (blocktype == PBLOCK)
-                                       add_deltas(cf->de_fwht, refp, 8);
-                               fill_decoder_block(refp, cf->de_fwht, 8);
-                       }
-
-                       input += 8 * input_step;
-                       refp += 8 * 8;
-
-                       size = rlc(cf->coeffs, *rlco, blocktype);
-                       if (last_size == size &&
-                           !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
-                               __be16 *last_rlco = *rlco - size;
-                               s16 hdr = ntohs(*last_rlco);
-
-                               if (!((*last_rlco ^ **rlco) & pframe_bit) &&
-                                   (hdr & DUPS_MASK) < DUPS_MASK)
-                                       *last_rlco = htons(hdr + 2);
-                               else
-                                       *rlco += size;
-                       } else {
-                               *rlco += size;
-                       }
-                       if (*rlco >= rlco_max) {
-                               encoding |= FRAME_UNENCODED;
-                               goto exit_loop;
-                       }
-                       last_size = size;
-               }
-               input += width * 7 * input_step;
-       }
-
-exit_loop:
-       if (encoding & FRAME_UNENCODED) {
-               u8 *out = (u8 *)rlco_start;
-
-               input = input_start;
-               /*
-                * The compressed stream should never contain the magic
-                * header, so when we copy the YUV data we replace 0xff
-                * by 0xfe. Since YUV is limited range such values
-                * shouldn't appear anyway.
-                */
-               for (i = 0; i < height * width; i++, input += input_step)
-                       *out++ = (*input == 0xff) ? 0xfe : *input;
-               *rlco = (__be16 *)out;
-               encoding &= ~FRAME_PCODED;
-       }
-       return encoding;
-}
-
-u32 encode_frame(struct raw_frame *frm, struct raw_frame *ref_frm,
-                struct cframe *cf, bool is_intra, bool next_is_intra)
-{
-       unsigned int size = frm->height * frm->width;
-       __be16 *rlco = cf->rlc_data;
-       __be16 *rlco_max;
-       u32 encoding;
-       u32 chroma_h = frm->height / frm->height_div;
-       u32 chroma_w = frm->width / frm->width_div;
-       unsigned int chroma_size = chroma_h * chroma_w;
-
-       rlco_max = rlco + size / 2 - 256;
-       encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
-                               frm->height, frm->width,
-                               frm->luma_step, is_intra, next_is_intra);
-       if (encoding & FRAME_UNENCODED)
-               encoding |= LUMA_UNENCODED;
-       encoding &= ~FRAME_UNENCODED;
-       rlco_max = rlco + chroma_size / 2 - 256;
-       encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, cf,
-                                chroma_h, chroma_w,
-                                frm->chroma_step, is_intra, next_is_intra);
-       if (encoding & FRAME_UNENCODED)
-               encoding |= CB_UNENCODED;
-       encoding &= ~FRAME_UNENCODED;
-       rlco_max = rlco + chroma_size / 2 - 256;
-       encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, cf,
-                                chroma_h, chroma_w,
-                                frm->chroma_step, is_intra, next_is_intra);
-       if (encoding & FRAME_UNENCODED)
-               encoding |= CR_UNENCODED;
-       encoding &= ~FRAME_UNENCODED;
-       cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
-       return encoding;
-}
-
-static void decode_plane(struct cframe *cf, const __be16 **rlco, u8 *ref,
-                        u32 height, u32 width, bool uncompressed)
-{
-       unsigned int copies = 0;
-       s16 copy[8 * 8];
-       s16 stat;
-       unsigned int i, j;
-
-       if (uncompressed) {
-               memcpy(ref, *rlco, width * height);
-               *rlco += width * height / 2;
-               return;
-       }
-
-       /*
-        * When decoding each macroblock the rlco pointer will be increased
-        * by 65 * 2 bytes worst-case.
-        * To avoid overflow the buffer has to be 65/64th of the actual raw
-        * image size, just in case someone feeds it malicious data.
-        */
-       for (j = 0; j < height / 8; j++) {
-               for (i = 0; i < width / 8; i++) {
-                       u8 *refp = ref + j * 8 * width + i * 8;
-
-                       if (copies) {
-                               memcpy(cf->de_fwht, copy, sizeof(copy));
-                               if (stat & PFRAME_BIT)
-                                       add_deltas(cf->de_fwht, refp, width);
-                               fill_decoder_block(refp, cf->de_fwht, width);
-                               copies--;
-                               continue;
-                       }
-
-                       stat = derlc(rlco, cf->coeffs);
-
-                       if (stat & PFRAME_BIT)
-                               dequantize_inter(cf->coeffs);
-                       else
-                               dequantize_intra(cf->coeffs);
-
-                       ifwht(cf->coeffs, cf->de_fwht,
-                             (stat & PFRAME_BIT) ? 0 : 1);
-
-                       copies = (stat & DUPS_MASK) >> 1;
-                       if (copies)
-                               memcpy(copy, cf->de_fwht, sizeof(copy));
-                       if (stat & PFRAME_BIT)
-                               add_deltas(cf->de_fwht, refp, width);
-                       fill_decoder_block(refp, cf->de_fwht, width);
-               }
-       }
-}
-
-void decode_frame(struct cframe *cf, struct raw_frame *ref, u32 hdr_flags)
-{
-       const __be16 *rlco = cf->rlc_data;
-       u32 h = cf->height / 2;
-       u32 w = cf->width / 2;
-
-       if (hdr_flags & VICODEC_FL_CHROMA_FULL_HEIGHT)
-               h *= 2;
-       if (hdr_flags & VICODEC_FL_CHROMA_FULL_WIDTH)
-               w *= 2;
-       decode_plane(cf, &rlco, ref->luma, cf->height, cf->width,
-                    hdr_flags & VICODEC_FL_LUMA_IS_UNCOMPRESSED);
-       decode_plane(cf, &rlco, ref->cb, h, w,
-                    hdr_flags & VICODEC_FL_CB_IS_UNCOMPRESSED);
-       decode_plane(cf, &rlco, ref->cr, h, w,
-                    hdr_flags & VICODEC_FL_CR_IS_UNCOMPRESSED);
-}
diff --git a/drivers/media/platform/vicodec/vicodec-codec.h b/drivers/media/platform/vicodec/vicodec-codec.h

deleted file mode 100644 (file)

index ff69d92..0000000
--- a/drivers/media/platform/vicodec/vicodec-codec.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Copyright 2016 Tom aan de Wiel
- * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
- */
-
-#ifndef VICODEC_RLC_H
-#define VICODEC_RLC_H
-
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <asm/byteorder.h>
-
-/*
- * The compressed format consists of a cframe_hdr struct followed by the
- * compressed frame data. The header contains the size of that data.
- * Each Y, Cb and Cr plane is compressed separately. If the compressed
- * size of each plane becomes larger than the uncompressed size, then
- * that plane is stored uncompressed and the corresponding bit is set
- * in the flags field of the header.
- *
- * Each compressed plane consists of macroblocks and each macroblock
- * is run-length-encoded. Each macroblock starts with a 16 bit value.
- * Bit 15 indicates if this is a P-coded macroblock (1) or not (0).
- * P-coded macroblocks contain a delta against the previous frame.
- *
- * Bits 1-12 contain a number. If non-zero, then this same macroblock
- * repeats that number of times. This results in a high degree of
- * compression for generated images like colorbars.
- *
- * Following this macroblock header the MB coefficients are run-length
- * encoded: the top 12 bits contain the coefficient, the bottom 4 bits
- * tell how many times this coefficient occurs. The value 0xf indicates
- * that the remainder of the macroblock should be filled with zeroes.
- *
- * All 16 and 32 bit values are stored in big-endian (network) order.
- *
- * Each cframe_hdr starts with an 8 byte magic header that is
- * guaranteed not to occur in the compressed frame data. This header
- * can be used to sync to the next frame.
- *
- * This codec uses the Fast Walsh Hadamard Transform. Tom aan de Wiel
- * developed this as part of a university project, specifically for use
- * with this driver. His project report can be found here:
- *
- * https://hverkuil.home.xs4all.nl/fwht.pdf
- */
-
-/*
- * Note: bit 0 of the header must always be 0. Otherwise it cannot
- * be guaranteed that the magic 8 byte sequence (see below) can
- * never occur in the rlc output.
- */
-#define PFRAME_BIT (1 << 15)
-#define DUPS_MASK 0x1ffe
-
-/*
- * This is a sequence of 8 bytes with the low 4 bits set to 0xf.
- *
- * This sequence cannot occur in the encoded data
- */
-#define VICODEC_MAGIC1 0x4f4f4f4f
-#define VICODEC_MAGIC2 0xffffffff
-
-#define VICODEC_VERSION 1
-
-#define VICODEC_MAX_WIDTH 3840
-#define VICODEC_MAX_HEIGHT 2160
-#define VICODEC_MIN_WIDTH 640
-#define VICODEC_MIN_HEIGHT 480
-
-#define PBLOCK 0
-#define IBLOCK 1
-
-/* Set if this is an interlaced format */
-#define VICODEC_FL_IS_INTERLACED       BIT(0)
-/* Set if this is a bottom-first (NTSC) interlaced format */
-#define VICODEC_FL_IS_BOTTOM_FIRST     BIT(1)
-/* Set if each 'frame' contains just one field */
-#define VICODEC_FL_IS_ALTERNATE                BIT(2)
-/*
- * If VICODEC_FL_IS_ALTERNATE was set, then this is set if this
- * 'frame' is the bottom field, else it is the top field.
- */
-#define VICODEC_FL_IS_BOTTOM_FIELD     BIT(3)
-/* Set if this frame is uncompressed */
-#define VICODEC_FL_LUMA_IS_UNCOMPRESSED        BIT(4)
-#define VICODEC_FL_CB_IS_UNCOMPRESSED  BIT(5)
-#define VICODEC_FL_CR_IS_UNCOMPRESSED  BIT(6)
-#define VICODEC_FL_CHROMA_FULL_HEIGHT  BIT(7)
-#define VICODEC_FL_CHROMA_FULL_WIDTH   BIT(8)
-
-struct cframe_hdr {
-       u32 magic1;
-       u32 magic2;
-       __be32 version;
-       __be32 width, height;
-       __be32 flags;
-       __be32 colorspace;
-       __be32 xfer_func;
-       __be32 ycbcr_enc;
-       __be32 quantization;
-       __be32 size;
-};
-
-struct cframe {
-       unsigned int width, height;
-       u16 i_frame_qp;
-       u16 p_frame_qp;
-       __be16 *rlc_data;
-       s16 coeffs[8 * 8];
-       s16 de_coeffs[8 * 8];
-       s16 de_fwht[8 * 8];
-       u32 size;
-};
-
-struct raw_frame {
-       unsigned int width, height;
-       unsigned int width_div;
-       unsigned int height_div;
-       unsigned int luma_step;
-       unsigned int chroma_step;
-       u8 *luma, *cb, *cr;
-};
-
-#define FRAME_PCODED   BIT(0)
-#define FRAME_UNENCODED        BIT(1)
-#define LUMA_UNENCODED BIT(2)
-#define CB_UNENCODED   BIT(3)
-#define CR_UNENCODED   BIT(4)
-
-u32 encode_frame(struct raw_frame *frm, struct raw_frame *ref_frm,
-                struct cframe *cf, bool is_intra, bool next_is_intra);
-void decode_frame(struct cframe *cf, struct raw_frame *ref, u32 hdr_flags);
-
-#endif
diff --git a/drivers/media/platform/vicodec/vicodec-core.c b/drivers/media/platform/vicodec/vicodec-core.c

index caff521d94c6a8d64ec89fe53531636a704c786e..4f2c35533e08a2a389adee6114d5bc440d1b1a52 100644 (file)
--- a/drivers/media/platform/vicodec/vicodec-core.c
+++ b/drivers/media/platform/vicodec/vicodec-core.c
@@ -23,7 +23,7 @@
  #include <media/v4l2-event.h>
  #include <media/videobuf2-vmalloc.h>
  
-#include "vicodec-codec.h"
+#include "codec-fwht.h"
  
  MODULE_DESCRIPTION("Virtual codec device");
  MODULE_AUTHOR("Hans Verkuil <hans.verkuil@cisco.com>");
@@ -152,7 +152,7 @@ struct vicodec_ctx {
  
         /* Source and destination queue data */
         struct vicodec_q_data   q_data[2];
-       struct raw_frame        ref_frame;
+       struct fwht_raw_frame   ref_frame;
         u8                      *compressed_frame;
         u32                     cur_buf_offset;
         u32                     comp_max_size;
@@ -191,9 +191,9 @@ static void encode(struct vicodec_ctx *ctx,
  {
         unsigned int size = q_data->width * q_data->height;
         const struct pixfmt_info *info = q_data->info;
-       struct cframe_hdr *p_hdr;
-       struct cframe cf;
-       struct raw_frame rf;
+       struct fwht_cframe_hdr *p_hdr;
+       struct fwht_cframe cf;
+       struct fwht_raw_frame rf;
         u32 encoding;
  
         rf.width = q_data->width;
@@ -279,29 +279,29 @@ static void encode(struct vicodec_ctx *ctx,
         cf.p_frame_qp = ctx->p_frame_qp;
         cf.rlc_data = (__be16 *)(p_out + sizeof(*p_hdr));
  
-       encoding = encode_frame(&rf, &ctx->ref_frame, &cf, !ctx->gop_cnt,
-                               ctx->gop_cnt == ctx->gop_size - 1);
-       if (!(encoding & FRAME_PCODED))
+       encoding = fwht_encode_frame(&rf, &ctx->ref_frame, &cf, !ctx->gop_cnt,
+                                    ctx->gop_cnt == ctx->gop_size - 1);
+       if (!(encoding & FWHT_FRAME_PCODED))
                 ctx->gop_cnt = 0;
         if (++ctx->gop_cnt >= ctx->gop_size)
                 ctx->gop_cnt = 0;
  
-       p_hdr = (struct cframe_hdr *)p_out;
-       p_hdr->magic1 = VICODEC_MAGIC1;
-       p_hdr->magic2 = VICODEC_MAGIC2;
-       p_hdr->version = htonl(VICODEC_VERSION);
+       p_hdr = (struct fwht_cframe_hdr *)p_out;
+       p_hdr->magic1 = FWHT_MAGIC1;
+       p_hdr->magic2 = FWHT_MAGIC2;
+       p_hdr->version = htonl(FWHT_VERSION);
         p_hdr->width = htonl(cf.width);
         p_hdr->height = htonl(cf.height);
-       if (encoding & LUMA_UNENCODED)
-               flags |= VICODEC_FL_LUMA_IS_UNCOMPRESSED;
-       if (encoding & CB_UNENCODED)
-               flags |= VICODEC_FL_CB_IS_UNCOMPRESSED;
-       if (encoding & CR_UNENCODED)
-               flags |= VICODEC_FL_CR_IS_UNCOMPRESSED;
+       if (encoding & FWHT_LUMA_UNENCODED)
+               flags |= FWHT_FL_LUMA_IS_UNCOMPRESSED;
+       if (encoding & FWHT_CB_UNENCODED)
+               flags |= FWHT_FL_CB_IS_UNCOMPRESSED;
+       if (encoding & FWHT_CR_UNENCODED)
+               flags |= FWHT_FL_CR_IS_UNCOMPRESSED;
         if (rf.height_div == 1)
-               flags |= VICODEC_FL_CHROMA_FULL_HEIGHT;
+               flags |= FWHT_FL_CHROMA_FULL_HEIGHT;
         if (rf.width_div == 1)
-               flags |= VICODEC_FL_CHROMA_FULL_WIDTH;
+               flags |= FWHT_FL_CHROMA_FULL_WIDTH;
         p_hdr->flags = htonl(flags);
         p_hdr->colorspace = htonl(ctx->colorspace);
         p_hdr->xfer_func = htonl(ctx->xfer_func);
@@ -320,11 +320,11 @@ static int decode(struct vicodec_ctx *ctx,
         unsigned int chroma_size = size;
         unsigned int i;
         u32 flags;
-       struct cframe_hdr *p_hdr;
-       struct cframe cf;
+       struct fwht_cframe_hdr *p_hdr;
+       struct fwht_cframe cf;
         u8 *p;
  
-       p_hdr = (struct cframe_hdr *)p_in;
+       p_hdr = (struct fwht_cframe_hdr *)p_in;
         cf.width = ntohl(p_hdr->width);
         cf.height = ntohl(p_hdr->height);
         flags = ntohl(p_hdr->flags);
@@ -334,13 +334,13 @@ static int decode(struct vicodec_ctx *ctx,
         ctx->quantization = ntohl(p_hdr->quantization);
         cf.rlc_data = (__be16 *)(p_in + sizeof(*p_hdr));
  
-       if (p_hdr->magic1 != VICODEC_MAGIC1 ||
-           p_hdr->magic2 != VICODEC_MAGIC2 ||
-           ntohl(p_hdr->version) != VICODEC_VERSION ||
-           cf.width < VICODEC_MIN_WIDTH ||
-           cf.width > VICODEC_MAX_WIDTH ||
-           cf.height < VICODEC_MIN_HEIGHT ||
-           cf.height > VICODEC_MAX_HEIGHT ||
+       if (p_hdr->magic1 != FWHT_MAGIC1 ||
+           p_hdr->magic2 != FWHT_MAGIC2 ||
+           ntohl(p_hdr->version) != FWHT_VERSION ||
+           cf.width < MIN_WIDTH ||
+           cf.width > MAX_WIDTH ||
+           cf.height < MIN_HEIGHT ||
+           cf.height > MAX_HEIGHT ||
             (cf.width & 7) || (cf.height & 7))
                 return -EINVAL;
  
@@ -348,12 +348,12 @@ static int decode(struct vicodec_ctx *ctx,
         if (cf.width != q_data->width || cf.height != q_data->height)
                 return -EINVAL;
  
-       if (!(flags & VICODEC_FL_CHROMA_FULL_WIDTH))
+       if (!(flags & FWHT_FL_CHROMA_FULL_WIDTH))
                 chroma_size /= 2;
-       if (!(flags & VICODEC_FL_CHROMA_FULL_HEIGHT))
+       if (!(flags & FWHT_FL_CHROMA_FULL_HEIGHT))
                 chroma_size /= 2;
  
-       decode_frame(&cf, &ctx->ref_frame, flags);
+       fwht_decode_frame(&cf, &ctx->ref_frame, flags);
  
         switch (q_data->info->id) {
         case V4L2_PIX_FMT_YUV420:
@@ -484,7 +484,7 @@ static int device_process(struct vicodec_ctx *ctx,
         }
  
         if (ctx->is_enc) {
-               struct cframe_hdr *p_hdr = (struct cframe_hdr *)p_out;
+               struct fwht_cframe_hdr *p_hdr = (struct fwht_cframe_hdr *)p_out;
  
                 encode(ctx, q_out, p_in, p_out, 0);
                 vb2_set_plane_payload(&out_vb->vb2_buf, 0,
@@ -635,9 +635,10 @@ restart:
                 }
                 ctx->comp_size = sizeof(magic);
         }
-       if (ctx->comp_size < sizeof(struct cframe_hdr)) {
-               struct cframe_hdr *p_hdr = (struct cframe_hdr *)ctx->compressed_frame;
-               u32 copy = sizeof(struct cframe_hdr) - ctx->comp_size;
+       if (ctx->comp_size < sizeof(struct fwht_cframe_hdr)) {
+               struct fwht_cframe_hdr *p_hdr =
+                       (struct fwht_cframe_hdr *)ctx->compressed_frame;
+               u32 copy = sizeof(struct fwht_cframe_hdr) - ctx->comp_size;
  
                 if (copy > p_out + sz - p)
                         copy = p_out + sz - p;
@@ -645,7 +646,7 @@ restart:
                        p, copy);
                 p += copy;
                 ctx->comp_size += copy;
-               if (ctx->comp_size < sizeof(struct cframe_hdr)) {
+               if (ctx->comp_size < sizeof(struct fwht_cframe_hdr)) {
                         job_remove_out_buf(ctx, state);
                         goto restart;
                 }
@@ -670,8 +671,8 @@ restart:
         ctx->cur_buf_offset = p - p_out;
         ctx->comp_has_frame = true;
         ctx->comp_has_next_frame = false;
-       if (sz - ctx->cur_buf_offset >= sizeof(struct cframe_hdr)) {
-               struct cframe_hdr *p_hdr = (struct cframe_hdr *)p;
+       if (sz - ctx->cur_buf_offset >= sizeof(struct fwht_cframe_hdr)) {
+               struct fwht_cframe_hdr *p_hdr = (struct fwht_cframe_hdr *)p;
                 u32 frame_size = ntohl(p_hdr->size);
                 u32 remaining = sz - ctx->cur_buf_offset - sizeof(*p_hdr);
  
@@ -845,7 +846,7 @@ static int vidioc_try_fmt(struct vicodec_ctx *ctx, struct v4l2_format *f)
                 pix->sizeimage = pix->width * pix->height *
                         info->sizeimage_mult / info->sizeimage_div;
                 if (pix->pixelformat == V4L2_PIX_FMT_FWHT)
-                       pix->sizeimage += sizeof(struct cframe_hdr);
+                       pix->sizeimage += sizeof(struct fwht_cframe_hdr);
                 break;
         case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
@@ -863,7 +864,7 @@ static int vidioc_try_fmt(struct vicodec_ctx *ctx, struct v4l2_format *f)
                 plane->sizeimage = pix_mp->width * pix_mp->height *
                         info->sizeimage_mult / info->sizeimage_div;
                 if (pix_mp->pixelformat == V4L2_PIX_FMT_FWHT)
-                       plane->sizeimage += sizeof(struct cframe_hdr);
+                       plane->sizeimage += sizeof(struct fwht_cframe_hdr);
                 memset(pix_mp->reserved, 0, sizeof(pix_mp->reserved));
                 memset(plane->reserved, 0, sizeof(plane->reserved));
                 break;
@@ -1308,7 +1309,7 @@ static int vicodec_start_streaming(struct vb2_queue *q,
         ctx->ref_frame.width = ctx->ref_frame.height = 0;
         ctx->ref_frame.luma = kvmalloc(size + 2 * size / chroma_div, GFP_KERNEL);
         ctx->comp_max_size = size + 2 * size / chroma_div +
-                            sizeof(struct cframe_hdr);
+                            sizeof(struct fwht_cframe_hdr);
         ctx->compressed_frame = kvmalloc(ctx->comp_max_size, GFP_KERNEL);
         if (!ctx->ref_frame.luma || !ctx->compressed_frame) {
                 kvfree(ctx->ref_frame.luma);
@@ -1493,7 +1494,7 @@ static int vicodec_open(struct file *file)
         ctx->q_data[V4L2_M2M_DST].sizeimage = size;
         ctx->colorspace = V4L2_COLORSPACE_REC709;
  
-       size += sizeof(struct cframe_hdr);
+       size += sizeof(struct fwht_cframe_hdr);
         if (ctx->is_enc) {
                 ctx->q_data[V4L2_M2M_DST].sizeimage = size;
                 ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->enc_dev, ctx,
author	Hans Verkuil <hans.verkuil@cisco.com>
	Tue, 21 Aug 2018 06:53:34 +0000 (02:53 -0400)
committer	Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
	Fri, 31 Aug 2018 12:27:43 +0000 (08:27 -0400)
Documentation/media/uapi/v4l/pixfmt-compressed.rst		patch \| blob \| history
drivers/media/platform/vicodec/Makefile		patch \| blob \| history
drivers/media/platform/vicodec/codec-fwht.c	[new file with mode: 0644]	patch \| blob
drivers/media/platform/vicodec/codec-fwht.h	[new file with mode: 0644]	patch \| blob
drivers/media/platform/vicodec/vicodec-codec.c	[deleted file]	patch \| blob \| history
drivers/media/platform/vicodec/vicodec-codec.h	[deleted file]	patch \| blob \| history
drivers/media/platform/vicodec/vicodec-core.c		patch \| blob \| history