From d191c3d0c409e150d7234a52715718dbe05c1bce Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 28 Sep 2023 23:51:28 +0200 Subject: [PATCH] micropython-lib: move to PCRE2 Add pending patch converting the package to PCRE2. Signed-off-by: Christian Marangi --- lang/python/micropython-lib/Makefile | 4 +- .../004-unix-ffi-re-convert-to-PCRE2.patch | 148 ++++++++++++++++++ 2 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 lang/python/micropython-lib/patches/004-unix-ffi-re-convert-to-PCRE2.patch diff --git a/lang/python/micropython-lib/Makefile b/lang/python/micropython-lib/Makefile index 7bd167898a..490e6e2e7d 100644 --- a/lang/python/micropython-lib/Makefile +++ b/lang/python/micropython-lib/Makefile @@ -8,7 +8,7 @@ include $(TOPDIR)/rules.mk PKG_NAME:=micropython-lib -PKG_RELEASE:=1 +PKG_RELEASE:=2 PKG_SOURCE_PROTO:=git PKG_SOURCE_URL:=https://github.com/micropython/micropython-lib.git @@ -50,7 +50,7 @@ endef define Package/micropython-lib-unix $(call Package/micropython-lib/Default) TITLE+= - Unix port packages - DEPENDS:=+micropython +libpcre +librt +libsqlite3 + DEPENDS:=+micropython +libpcre2 +librt +libsqlite3 endef define Package/micropython-lib-unix-src diff --git a/lang/python/micropython-lib/patches/004-unix-ffi-re-convert-to-PCRE2.patch b/lang/python/micropython-lib/patches/004-unix-ffi-re-convert-to-PCRE2.patch new file mode 100644 index 0000000000..6bf539b144 --- /dev/null +++ b/lang/python/micropython-lib/patches/004-unix-ffi-re-convert-to-PCRE2.patch @@ -0,0 +1,148 @@ +From 1cbe8c4dd653336c5766dfd75eb379ad37f04249 Mon Sep 17 00:00:00 2001 +From: Christian Marangi +Date: Thu, 28 Sep 2023 20:59:26 +0200 +Subject: [PATCH] unix-ffi: re: convert to PCRE2 + +PCRE is marked as EOL and won't receive any new security update. + +Convert the re module to PCRE2 API to enforce security. +Additional dependency is now needed with uctypes due to changes in how +PCRE2 return the match_data in a pointer and require special handling. + +The converted module is tested with the test_re.py with no regression. + +Signed-off-by: Christian Marangi +--- + unix-ffi/re/re.py | 73 +++++++++++++++++++++++++++++++---------------- + 1 file changed, 48 insertions(+), 25 deletions(-) + +--- a/unix-ffi/re/re.py ++++ b/unix-ffi/re/re.py +@@ -1,36 +1,55 @@ + import sys + import ffilib + import array ++import uctypes + ++pcre2 = ffilib.open("libpcre2-8") + +-pcre = ffilib.open("libpcre") +- +-# pcre *pcre_compile(const char *pattern, int options, +-# const char **errptr, int *erroffset, +-# const unsigned char *tableptr); +-pcre_compile = pcre.func("p", "pcre_compile", "sipps") +- +-# int pcre_exec(const pcre *code, const pcre_extra *extra, +-# const char *subject, int length, int startoffset, +-# int options, int *ovector, int ovecsize); +-pcre_exec = pcre.func("i", "pcre_exec", "PPsiiipi") +- +-# int pcre_fullinfo(const pcre *code, const pcre_extra *extra, +-# int what, void *where); +-pcre_fullinfo = pcre.func("i", "pcre_fullinfo", "PPip") +- +- +-IGNORECASE = I = 1 +-MULTILINE = M = 2 +-DOTALL = S = 4 +-VERBOSE = X = 8 +-PCRE_ANCHORED = 0x10 ++# pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, ++# uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, ++# pcre2_compile_context *ccontext); ++pcre2_compile = pcre2.func("p", "pcre2_compile_8", "siippp") ++ ++# int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, ++# PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, ++# pcre2_match_data *match_data, pcre2_match_context *mcontext); ++pcre2_match = pcre2.func("i", "pcre2_match_8", "Psiiipp") ++ ++# int pcre2_pattern_info(const pcre2_code *code, uint32_t what, ++# void *where); ++pcre2_pattern_info = pcre2.func("i", "pcre2_pattern_info_8", "Pip") ++ ++# PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); ++pcre2_get_ovector_pointer = pcre2.func("p", "pcre2_get_ovector_pointer_8", "p") ++ ++# pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code, ++# pcre2_general_context *gcontext); ++pcre2_match_data_create_from_pattern = pcre2.func( ++ "p", "pcre2_match_data_create_from_pattern_8", "Pp" ++) ++ ++# PCRE2_SIZE that is of type size_t. ++# Use ULONG as type to support both 32bit and 64bit. ++PCRE2_SIZE_SIZE = uctypes.sizeof({"field": 0 | uctypes.ULONG}) ++PCRE2_SIZE_TYPE = "L" ++ ++# Real value in pcre2.h is 0xFFFFFFFF for 32bit and ++# 0x0xFFFFFFFFFFFFFFFF for 64bit that is equivalent ++# to -1 ++PCRE2_ZERO_TERMINATED = -1 ++ ++ ++IGNORECASE = I = 0x8 ++MULTILINE = M = 0x400 ++DOTALL = S = 0x20 ++VERBOSE = X = 0x80 ++PCRE2_ANCHORED = 0x80000000 + + # TODO. Note that Python3 has unicode by default + ASCII = A = 0 + UNICODE = U = 0 + +-PCRE_INFO_CAPTURECOUNT = 2 ++PCRE2_INFO_CAPTURECOUNT = 0x4 + + + class PCREMatch: +@@ -67,19 +86,23 @@ class PCREPattern: + def search(self, s, pos=0, endpos=-1, _flags=0): + assert endpos == -1, "pos: %d, endpos: %d" % (pos, endpos) + buf = array.array("i", [0]) +- pcre_fullinfo(self.obj, None, PCRE_INFO_CAPTURECOUNT, buf) ++ pcre2_pattern_info(self.obj, PCRE2_INFO_CAPTURECOUNT, buf) + cap_count = buf[0] +- ov = array.array("i", [0, 0, 0] * (cap_count + 1)) +- num = pcre_exec(self.obj, None, s, len(s), pos, _flags, ov, len(ov)) ++ match_data = pcre2_match_data_create_from_pattern(self.obj, None) ++ num = pcre2_match(self.obj, s, len(s), pos, _flags, match_data, None) + if num == -1: + # No match + return None ++ ov_ptr = pcre2_get_ovector_pointer(match_data) ++ # pcre2_get_ovector_pointer return PCRE2_SIZE ++ ov_buf = uctypes.bytearray_at(ov_ptr, PCRE2_SIZE_SIZE * (cap_count + 1) * 2) ++ ov = array.array(PCRE2_SIZE_TYPE, ov_buf) + # We don't care how many matching subexpressions we got, we + # care only about total # of capturing ones (including empty) + return PCREMatch(s, cap_count + 1, ov) + + def match(self, s, pos=0, endpos=-1): +- return self.search(s, pos, endpos, PCRE_ANCHORED) ++ return self.search(s, pos, endpos, PCRE2_ANCHORED) + + def sub(self, repl, s, count=0): + if not callable(repl): +@@ -141,9 +164,9 @@ class PCREPattern: + + + def compile(pattern, flags=0): +- errptr = bytes(4) ++ errcode = bytes(4) + erroffset = bytes(4) +- regex = pcre_compile(pattern, flags, errptr, erroffset, None) ++ regex = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, flags, errcode, erroffset, None) + assert regex + return PCREPattern(regex) + +@@ -154,7 +177,7 @@ def search(pattern, string, flags=0): + + + def match(pattern, string, flags=0): +- r = compile(pattern, flags | PCRE_ANCHORED) ++ r = compile(pattern, flags | PCRE2_ANCHORED) + return r.search(string) + + -- 2.30.2