allow collect.py script to script sites like downloads.openwrt.org
authorMoritz Warning <moritzwarning@web.de>
Fri, 31 Jul 2020 12:55:08 +0000 (14:55 +0200)
committerMoritz Warning <moritzwarning@web.de>
Fri, 31 Jul 2020 12:56:16 +0000 (14:56 +0200)
README.md
misc/collect.py

index b361967c8ef6bcb2fa36697a5dc44b24932998eb..0cd415196e67ab9eca9efd582d8388a49801d8fb 100644 (file)
--- a/README.md
+++ b/README.md
@@ -24,10 +24,15 @@ The `overview.json` files are based on JSON files created by OpenWrt
 (master): `Global build settings  ---> [*] Create JSON info files per build
 image`.
 
-A [Python script](misc/collect.py) is included to merge the JSON files:
-`./collect.py bin/ --download-url
-'https://downloads.openwrt.org/releases/{version}/targets/{target}' >
-overview.json`.
+A [Python script](misc/collect.py) is included to merge the JSON files into a single overview.json:
+```
+./collect.py merge bin/ --download-url 'https://downloads.openwrt.org/releases/{version}/targets/{target}' > overview.json
+```
+
+If you want to scrape the OpenWrt download website and update the config.js automatically:
+```
+./collect.py scrape https://downloads.openwrt.org /var/www/firmware_selector
+```
 
 For the OpenWrt 18.06 and 19.07 releases, you need to patch OpenWrt to output JSON files for collect.py (commit [openwrt/openwrt@881ed09](https://github.com/openwrt/openwrt/commit/881ed09ee6e23f6c224184bb7493253c4624fb9f)).
 
index b6af771540712fc3c57433712ea15cf4cf46ee2f..60116704ff1e4331ec540a50f4d9d74b5d6d617d 100755 (executable)
 #!/usr/bin/env python3
 
 from pathlib import Path
+import urllib.request
 import argparse
+import shutil
 import json
 import sys
 import os
+import re
+
 
 parser = argparse.ArgumentParser()
-parser.add_argument("input_path", nargs="+",
-  help="Input folder that is traversed for OpenWrt JSON device files.")
-parser.add_argument("--download-url", action="store", default="",
-  help="Link to get the image from. May contain {target}, {version} and {commit}")
 parser.add_argument("--formatted", action="store_true",
   help="Output formatted JSON data.")
-parser.add_argument("--change-prefix",
-  help="Change the openwrt- file name prefix.")
+subparsers = parser.add_subparsers(dest='action', required=True)
+
+parser_merge = subparsers.add_parser('merge',
+  help='Create a grid structure with horizontal and vertical connections.')
+parser_merge.add_argument("input_path", nargs="+",
+  help="Input folder that is traversed for OpenWrt JSON device files.")
+parser_merge.add_argument("--download-url", action="store", default="",
+  help="Link to get the image from. May contain {target}, {version} and {commit}")
+#parser_merge.add_argument("--change-prefix",
+#  help="Change the openwrt- file name prefix.")
+
+parser_scrape = subparsers.add_parser('scrape',
+  help='Create a grid structure of horizontal, vertical and vertical connections.')
+parser_scrape.add_argument('domain',
+  help='Domain to scrape. E.g. https://downloads.openwrt.org')
+parser_scrape.add_argument('selector',
+  help='Path the config.js file is in.')
 
 args = parser.parse_args()
 
 SUPPORTED_METADATA_VERSION = 1
 
+# accepts {<file-path>: <file-content>}
+def merge_profiles(profiles, download_url):
+  # json output data
+  output = {}
+
+  def get_title_name(title):
+    if "title" in title:
+      return title["title"]
+    else:
+      return "{} {} {}".format(title.get("vendor", ""), title["model"], title.get("variant", "")).strip()
+
+  def add_profile(id, target, profile, code=None):
+    images = []
+    for image in profile["images"]:
+        images.append({"name": image["name"], "type": image["type"]})
+
+    if target is None:
+      target = profile["target"]
+
+    #if args.change_prefix:
+    #    change_prefix(images, "openwrt-", args.change_prefix)
+
+    for title in profile["titles"]:
+      name = get_title_name(title)
+
+      if len(name) == 0:
+        sys.stderr.write(f"Empty title. Skip title in {path}\n")
+        continue
+
+      output["models"][name] = {"id": id, "target": target, "images": images}
+
+      if code is not None:
+        output["models"][name]["code"] = code
+
+  for path, content in profiles.items():
+      obj = json.loads(content)
+
+      if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
+        sys.stderr.write(f"{path} has unsupported metadata version: {obj['metadata_version']} => skip\n")
+        continue
+
+      code = obj.get("version_code", obj.get("version_commit"))
+
+      if not "version_code" in output:
+        output = {
+          "version_code": code,
+          "download_url": download_url,
+          "models" : {}
+        }
+
+      # if we have mixed codes/commits, store in device object
+      if output["version_code"] == code:
+        code = None;
+
+      try:
+        if "profiles" in obj:
+          for id in obj["profiles"]:
+            add_profile(id, obj.get("target"), obj["profiles"][id], code)
+        else:
+          add_profile(obj["id"], obj["target"], obj, code)
+      except json.decoder.JSONDecodeError as e:
+        sys.stderr.write(f"Skip {path}\n   {e}\n")
+      except KeyError as e:
+        sys.stderr.write(f"Abort on {path}\n   Missing key {e}\n")
+        exit(1)
+
+  return output
+
+def scrape(url, selector_path):
+  config_path = f"{selector_path}/config.js"
+  data_path = f"{selector_path}/data"
+  versions = {}
+
+  def update_config(config_path, versions):
+    content = ''
+    with open(config_path, 'r') as file:
+      content = file.read()
+
+    content = re.sub('versions:[\\s]*{[^}]*}', f'versions: {versions}' , content)
+    with open(config_path, 'w+') as file:
+      # save updated config
+      file.write(content)
+
+  def handle_release(target):
+    profiles = {}
+    with urllib.request.urlopen(f"{target}/?json") as file:
+      array = json.loads(file.read().decode('utf-8'))
+      for profile in filter(lambda x: x.endswith('/profiles.json'), array):
+        #print(profile)
+        with urllib.request.urlopen(f"{target}/{profile}") as file:
+          profiles[f"{target}/{profile}"] = file.read()
+    return profiles
+
+  if not os.path.isfile(config_path):
+      print(f"file not found: {config_path}")
+      exit(1)
+
+  shutil.rmtree(data_path, ignore_errors=True)
+
+  # fetch release URLs
+  with urllib.request.urlopen(url) as infile:
+    for path in re.findall(r'href=["\']?([^\'" >]+)', str(infile.read())):
+      if not path.startswith('/') and path.endswith('targets/'):
+        release = path.strip('/').split('/')[-2]
+        download_url = f"{url}/{path}/{{target}}"
+
+        profiles = handle_release(f"{url}/{path}")
+        output = merge_profiles(profiles, download_url)
+        if len(output) > 0:
+          Path(f"{data_path}/{release}").mkdir(parents=True, exist_ok=True)
+          # write overview.json
+          with open(f"{data_path}/{release}/overview.json", 'w') as outfile:
+            if args.formatted:
+              json.dump(output, outfile, indent="  ", sort_keys=True)
+            else:
+              json.dump(output, outfile, sort_keys=True)
+
+          versions[release.upper()] = f"data/{release}/overview.json"
+
+  update_config(config_path, versions)
+
+'''
 def change_prefix(images, old_prefix, new_prefix):
     for image in images:
         if image["name"].startswith(old_prefix):
             image["name"] = new_prefix + image["name"][len(old_prefix):]
-
-# OpenWrt JSON device files
-paths = []
-
-# json output data
-output = {}
-
-for path in args.input_path:
-  if os.path.isdir(path):
-    for file in Path(path).rglob("*.json"):
-      paths.append(file)
+'''
+
+def merge(input_paths):
+  # OpenWrt JSON device files
+  profiles = {}
+
+  def add_path(path):
+    #paths.append(path)
+    with open(path, "r") as file:
+        profiles[path] = file.read()
+
+  for path in input_paths:
+    if os.path.isdir(path):
+      for filepath in Path(path).rglob("*.json"):
+        add_path(filepath)
+    else:
+      if not path.endswith(".json"):
+        sys.stderr.write(f"Folder does not exists: {path}\n")
+        exit(1)
+      add_path(path)
+
+  output = merge_profiles(profiles, args.download_url)
+
+  if args.formatted:
+    json.dump(output, sys.stdout, indent="  ", sort_keys=True)
   else:
-    if not path.endswith(".json"):
-      sys.stderr.write(f"Folder does not exists: {path}\n")
-      exit(1)
-    paths.append(path)
+    json.dump(output, sys.stdout, sort_keys=True)
 
-def get_title_name(title):
-  if "title" in title:
-    return title["title"]
-  else:
-    return "{} {} {}".format(title.get("vendor", ""), title["model"], title.get("variant", "")).strip()
-
-def add_profile(id, target, profile, code=None):
-  images = []
-  for image in profile["images"]:
-      images.append({"name": image["name"], "type": image["type"]})
-
-  if target is None:
-    target = profile["target"]
-
-  if args.change_prefix:
-      change_prefix(images, "openwrt-", args.change_prefix)
-
-  for title in profile["titles"]:
-    name = get_title_name(title)
-
-    if len(name) == 0:
-      sys.stderr.write(f"Empty title. Skip title in {path}\n")
-      continue
-
-    output["models"][name] = {"id": id, "target": target, "images": images}
-
-    if code is not None:
-      output["models"][name]["code"] = code
-
-for path in paths:
-  with open(path, "r") as file:
-    obj = json.load(file)
-
-    if obj["metadata_version"] != SUPPORTED_METADATA_VERSION:
-      sys.stderr.write(f"{path} has unsupported metadata version: {obj["metadata_version"]} => skip\n")
-      continue
-
-    code = obj.get("version_code", obj.get("version_commit"))
-
-    if not "version_code" in output:
-      output = {
-        "version_code": code,
-        "download_url": args.download_url,
-        "models" : {}
-      }
-
-    # if we have mixed codes/commits, store in device object
-    if output["version_code"] == code:
-      code = None;
-
-    try:
-      if "profiles" in obj:
-        for id in obj["profiles"]:
-          add_profile(id, obj.get("target"), obj["profiles"][id], code)
-      else:
-        add_profile(obj["id"], obj["target"], obj, code)
-    except json.decoder.JSONDecodeError as e:
-      sys.stderr.write(f"Skip {path}\n   {e}\n")
-    except KeyError as e:
-      sys.stderr.write(f"Abort on {path}\n   Missing key {e}\n")
-      exit(1)
+if args.action == "merge":
+  merge(args.input_path)
 
-if args.formatted:
-  json.dump(output, sys.stdout, indent="  ", sort_keys=True)
-else:
-  json.dump(output, sys.stdout, sort_keys=True)
+if args.action == "scrape":
+  scrape(args.domain, args.selector)