From 238acb305b2d702753de5386e129bdd35fec1321 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 17 Jan 2024 19:34:29 +0100 Subject: [PATCH] Initial import Signed-off-by: Felix Fietkau Signed-off-by: John Crispin --- Makefile | 47 +++ README.md | 26 ++ data/android.json | 19 ++ data/apple.json | 226 +++++++++++++++ data/microsoft.json | 10 + data/nest.json | 9 + data/nintendo.json | 10 + data/ring.json | 9 + data/samsung.json | 18 ++ data/sony.json | 10 + files/etc/init.d/ufp | 14 + files/usr/sbin/ufpd | 377 ++++++++++++++++++++++++ files/usr/share/ufp/plugin_dhcp.uc | 120 ++++++++ files/usr/share/ufp/plugin_mdns.uc | 298 +++++++++++++++++++ files/usr/share/ufp/plugin_wifi.uc | 320 +++++++++++++++++++++ scripts/convert-devices.uc | 58 ++++ src/CMakeLists.txt | 36 +++ src/README.md | 81 ++++++ src/ucode.c | 240 ++++++++++++++++ src/uht.c | 442 +++++++++++++++++++++++++++++ src/uht.h | 139 +++++++++ src/xxhash32.c | 146 ++++++++++ src/xxhash32.h | 8 + 23 files changed, 2663 insertions(+) create mode 100644 Makefile create mode 100644 README.md create mode 100644 data/android.json create mode 100644 data/apple.json create mode 100644 data/microsoft.json create mode 100644 data/nest.json create mode 100644 data/nintendo.json create mode 100644 data/ring.json create mode 100644 data/samsung.json create mode 100644 data/sony.json create mode 100755 files/etc/init.d/ufp create mode 100755 files/usr/sbin/ufpd create mode 100644 files/usr/share/ufp/plugin_dhcp.uc create mode 100644 files/usr/share/ufp/plugin_mdns.uc create mode 100644 files/usr/share/ufp/plugin_wifi.uc create mode 100644 scripts/convert-devices.uc create mode 100644 src/CMakeLists.txt create mode 100644 src/README.md create mode 100644 src/ucode.c create mode 100644 src/uht.c create mode 100644 src/uht.h create mode 100644 src/xxhash32.c create mode 100644 src/xxhash32.h diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a9e35f5 --- /dev/null +++ b/Makefile @@ -0,0 +1,47 @@ +# +# Copyright (C) 2021 OpenWrt.org +# +# This is free software, licensed under the GNU General Public License v2. +# See /LICENSE for more information. +# + +include $(TOPDIR)/rules.mk + +PKG_NAME:=ufp +PKG_VERSION:=1 + +PKG_LICENSE:=GPL-2.0 +PKG_MAINTAINER:=Felix Fietkau + +HOST_BUILD_DEPENDS:=ucode/host libubox/host +PKG_BUILD_DEPENDS:=bpf-headers ufp/host + +include $(INCLUDE_DIR)/host-build.mk +include $(INCLUDE_DIR)/package.mk +include $(INCLUDE_DIR)/cmake.mk + +define Package/ufp + SECTION:=utils + CATEGORY:=Utilities + TITLE:=Device fingerprinting daemon + DEPENDS:=+ucode +ucode-mod-fs +libubox +endef + +define Package/ufp/conffiles +/etc/config/ufp +endef + +define Host/Prepare + mkdir -p $(HOST_BUILD_DIR) + $(CP) ./src/* $(HOST_BUILD_DIR)/ +endef + +define Package/ufp/install + $(INSTALL_DIR) $(1)/usr/lib/ucode $(1)/usr/share/ufp + $(INSTALL_DATA) $(PKG_INSTALL_DIR)/usr/lib/ucode/uht.so $(1)/usr/lib/ucode/ + ucode ./scripts/convert-devices.uc $(1)/usr/share/ufp/devices.bin ./data/*.json + $(CP) ./files/* $(1)/ +endef + +$(eval $(call BuildPackage,ufp)) +$(eval $(call HostBuild)) diff --git a/README.md b/README.md new file mode 100644 index 0000000..04e903f --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# ufp - OpenWrt device fingerprinting + +The daemon uses plugins to gather information about connected devices. Each fingerprint type has a weight. Based on the aggregated weight of all identified data points, the fingerprint is matched against the data base. + +The following plugins currently exist +* DHCP +* WIFI Taxonomy +* MDNS + +## uBus API +* ubus call fingerprint fingerprint - This is the main user facing method, listing the identified devices. +* ubus call fingerprint get_data - This lists the raw data used for internal matching, + + +## Supported data points +* wifi4 - the HT taxonomy +* wifi6 - the VHT taxonomy +* wifi-vendor-oui-XXYYZZ - identified vendor IEs +* dhcp_req - the DHCP discover / request signature +* dhcp_vendorid - the vendor ID value inside DHCP discover / request frames +* mac-oui-XXYYZZ - OUI of the primary MAC + +All MDNS based fingerprinting is done inside the plugin. + +## Setting up the code +The daemon does not need to be configured, it will try to identify any device it sees on wired and wireless interfaces. The latest version of umdnsd and udhcpsnoop are required for the plugins to work. diff --git a/data/android.json b/data/android.json new file mode 100644 index 0000000..7181504 --- /dev/null +++ b/data/android.json @@ -0,0 +1,19 @@ +{ + "os_vendor=Google|os=%": { + "Android 10": [ + "dhcp_vendorid|android-dhcp-10" + ], + "Android 11": [ + "dhcp_vendorid|android-dhcp-11" + ], + "Android 12": [ + "dhcp_vendorid|android-dhcp-12" + ], + "Android 13": [ + "dhcp_vendorid|android-dhcp-13" + ], + "Android 9": [ + "dhcp_vendorid|android-dhcp-9" + ] + } +} diff --git a/data/apple.json b/data/apple.json new file mode 100644 index 0000000..375a00d --- /dev/null +++ b/data/apple.json @@ -0,0 +1,226 @@ +{ + "vendor=%": { + "Apple": [ + "wifi-vendor-oui-0017f2|1", + "mac-oui-f4d488|1" + ] + }, + "vendor=Apple": { + "iOS device": [ + "dhcp_req|1,121,3,6,15,108,114,119,252" + ], + "macOS device": [ + "dhcp_req|1,121,3,6,15,108,114,119,252,95,44,46" + ] + }, + "vendor=Apple|class=laptop": { + "MacBook Air M1": [ + "wifi6|0,1,33,36,48,45,127,191,255(35),221(0017f20a),221(00904c04),221(00101802),221(0050f202),htcap:006f,htagg:1b,htmcs:0000ffff,vhtcap:0f817832,vhtrxmcs:0000fffa,vhttxmcs:0000fffa,txpow:16f9,extcap:0000000000000040,hemac:800000080801,hephy:000c00089f011d800230", + "apple_model|MacBookAir10,1" + ], + "MacBook Air M2": [ + "wifi6|0,1,33,36,48,45,127,191,255(35),221(0017f20a),221(00904c04),221(00101802),221(0050f202),htcap:006f,htagg:1b,htmcs:0000ffff,vhtcap:0f817832,vhtrxmcs:0000fffa,vhttxmcs:0000fffa,txpow:15f9,extcap:0000000000000040,hemac:800000080801,hephy:000c00089f031d800230", + "apple_model|Mac14,2" + ], + "MacBook Pro M1": [ + "wifi4|0,1,33,36,48,54,45,127,191,221(0017f20a),221(00904c04),221(00101802),221(0050f202),htcap:006f,htagg:1b,htmcs:0000ffff,vhtcap:0f817832,vhtrxmcs:0000fffa,vhttxmcs:0000fffa,txpow:15f9,extcap:0000000000000040", + "wifi6|0,1,33,36,48,54,45,127,191,255(35),221(0017f20a),221(00904c04),221(00101802),221(0050f202),htcap:006f,htagg:1b,htmcs:0000ffff,vhtcap:0f817832,vhtrxmcs:0000fffa,vhttxmcs:0000fffa,txpow:15f9,extcap:0000000000000040,hemac:800000080801,hephy:000c00089f031d800230", + "apple_model|MacBookPro17,1", + "apple_model|MacBookPro18,1", + "apple_model|MacBookPro18,2", + "apple_model|MacBookPro18,3", + "apple_model|MacBookPro18,4" + ] + }, + "vendor=Apple|class=pc": { + "MacBook Studio M1": [ + "wifi4|0,1,33,36,48,45,127,191,221(0017f20a),221(00904c04),221(00101802),221(0050f202),htcap:006f,htagg:1b,htmcs:0000ffff,vhtcap:0f817832,vhtrxmcs:0000fffa,vhttxmcs:0000fffa,txpow:15f9,extcap:0000000000000040", + "wifi6|0,1,33,36,48,45,127,191,255(35),221(0017f20a),221(00904c04),221(00101802),221(0050f202),htcap:006f,htagg:1b,htmcs:0000ffff,vhtcap:0f817832,vhtrxmcs:0000fffa,vhttxmcs:0000fffa,txpow:15f9,extcap:0000000000000040,hemac:800000080801,hephy:000c00089f031d800230", + "apple_model|Mac13,1" + ] + }, + "vendor=Apple|class=phone": { + "iPhone 13": [ + "apple_model|iPhone14,5" + ], + "iPhone 13 Pro": [ + "apple_model|iPhone14,2" + ], + "iPhone 13 Pro Max": [ + "apple_model|iPhone14,3" + ], + "iPhone 13 mini": [ + "wifi4|0,1,33,36,48,54,45,127,191,221(0017f20a),221(00904c04),221(00101802),221(0050f202),htcap:006f,htagg:1b,htmcs:0000ffff,vhtcap:0f817032,vhtrxmcs:0000fffa,vhttxmcs:0000fffa,txpow:15f9,extcap:0000000000000040", + "wifi6|0,1,33,36,48,54,45,127,191,255(35),221(0017f20a),221(00904c04),221(00101802),221(0050f202),htcap:006f,htagg:1b,htmcs:0000ffff,vhtcap:0f817032,vhtrxmcs:0000fffa,vhttxmcs:0000fffa,txpow:15f9,extcap:0000000000000040,hemac:800000080801,hephy:000c00089f001d000230", + "apple_model|iPhone14,4" + ], + "iPhone 14": [ + "apple_model|iPhone14,7" + ], + "iPhone 14 Plus": [ + "apple_model|iPhone14,8" + ], + "iPhone 14 Pro": [ + "apple_model|iPhone15,2" + ], + "iPhone 14 Pro Max": [ + "apple_model|iPhone15,3" + ] + }, + "vendor=Apple|class=speaker": { + "HomePod": [ + "apple_model|AudioAccessory1,1", + "apple_model|AudioAccessory1,2" + ], + "HomePod mini": [ + "apple_model|AudioAccessory5,1" + ] + }, + "vendor=Apple|class=tablet": { + "iPad": [ + "apple_model|iPad1,1", + "apple_model|iPad2,1", + "apple_model|iPad2,2", + "apple_model|iPad2,4", + "apple_model|iPad3,1", + "apple_model|iPad3,2", + "apple_model|iPad3,3", + "apple_model|iPad3,4", + "apple_model|iPad3,5", + "apple_model|iPad3,6", + "apple_model|iPad6,11", + "apple_model|iPad6,12", + "apple_model|iPad7,5", + "apple_model|iPad7,6", + "apple_model|iPad7,11", + "apple_model|iPad11,6", + "apple_model|iPad11,7", + "apple_model|iPad12,1", + "apple_model|iPad12,2", + "apple_model|iPad13,18", + "apple_model|iPad13,19" + ], + "iPad Air": [ + "apple_model|iPad4,1", + "apple_model|iPad4,2", + "apple_model|iPad4,3", + "apple_model|iPad5,3", + "apple_model|iPad5,4", + "apple_model|iPad11,3", + "apple_model|iPad11,4", + "apple_model|iPad13,1", + "apple_model|iPad13,2", + "apple_model|iPad13,16", + "apple_model|iPad13,17" + ], + "iPad Pro": [ + "apple_model|iPad6,3", + "apple_model|iPad6,4", + "apple_model|iPad6,6", + "apple_model|iPad6,7", + "apple_model|iPad6,8", + "apple_model|iPad7,1", + "apple_model|iPad7,2", + "apple_model|iPad7,3", + "apple_model|iPad7,4", + "apple_model|iPad8,1", + "apple_model|iPad8,2", + "apple_model|iPad8,3", + "apple_model|iPad8,4", + "apple_model|iPad8,5", + "apple_model|iPad8,6", + "apple_model|iPad8,7", + "apple_model|iPad8,8", + "apple_model|iPad8,9", + "apple_model|iPad8,10", + "apple_model|iPad8,11", + "apple_model|iPad8,12", + "apple_model|iPad13,4", + "apple_model|iPad13,5", + "apple_model|iPad13,6", + "apple_model|iPad13,7", + "apple_model|iPad13,8", + "apple_model|iPad13,9", + "apple_model|iPad13,10", + "apple_model|iPad13,11" + ], + "iPad mini": [ + "apple_model|iPad2,5", + "apple_model|iPad2,6", + "apple_model|iPad2,7", + "apple_model|iPad4,4", + "apple_model|iPad4,5", + "apple_model|iPad4,6", + "apple_model|iPad4,7", + "apple_model|iPad4,8", + "apple_model|iPad4,9", + "apple_model|iPad5,1", + "apple_model|iPad5,2", + "apple_model|iPad11,1", + "apple_model|iPad11,2", + "apple_model|iPad14,1", + "apple_model|iPad14,2" + ] + }, + "vendor=Apple|class=tv": { + "Apple TV": [ + "apple_model|AppleTV1,1", + "apple_model|AppleTV2,1", + "apple_model|AppleTV1,1", + "apple_model|AppleTV3,1", + "apple_model|AppleTV3,2", + "apple_model|AppleTV5,3", + "apple_model|AppleTV6,2", + "apple_model|AppleTV11,1", + "apple_model|AppleTV14,1" + ] + }, + "vendor=Apple|class=watch": { + "Apple Watch": [ + "apple_model|Watch1,1", + "apple_model|Watch1,2", + "apple_model|Watch2,3", + "apple_model|Watch2,4", + "apple_model|Watch2,6", + "apple_model|Watch2,7", + "apple_model|Watch3,1", + "apple_model|Watch3,2", + "apple_model|Watch3,3", + "apple_model|Watch4,1", + "apple_model|Watch4,2", + "apple_model|Watch4,3", + "apple_model|Watch4,4", + "apple_model|Watch5,1", + "apple_model|Watch5,2", + "apple_model|Watch5,3", + "apple_model|Watch5,4", + "apple_model|Watch5,4", + "apple_model|Watch5,9", + "apple_model|Watch5,10", + "apple_model|Watch5,11", + "apple_model|Watch5,12", + "apple_model|Watch6,1", + "apple_model|Watch6,2", + "apple_model|Watch6,3", + "apple_model|Watch6,4", + "apple_model|Watch6,6", + "apple_model|Watch6,7", + "apple_model|Watch6,8", + "apple_model|Watch6,9", + "apple_model|Watch6,10", + "apple_model|Watch6,11", + "apple_model|Watch6,12", + "apple_model|Watch6,13", + "apple_model|Watch6,14", + "apple_model|Watch6,15", + "apple_model|Watch6,16", + "apple_model|Watch6,17" + ], + "Apple Watch Series 6": [ + "wifi4|0,1,33,36,48,45,221(0017f20a),221(0050f202),htcap:812c,htagg:1e,htmcs:000000ff,txpow:1000" + ], + "Apple Watch Ultra": [ + "apple_model|Watch6,18" + ] + } +} diff --git a/data/microsoft.json b/data/microsoft.json new file mode 100644 index 0000000..a52c9e5 --- /dev/null +++ b/data/microsoft.json @@ -0,0 +1,10 @@ +{ + "os_vendor=Microsoft|os=%": { + "Windows 10": [ + "dhcp_vendorid|MSFT 5.0" + ], + "XBOX": [ + "dhcp_vendorid|MSFT 5.0 XBOX" + ] + } +} diff --git a/data/nest.json b/data/nest.json new file mode 100644 index 0000000..0c542c2 --- /dev/null +++ b/data/nest.json @@ -0,0 +1,9 @@ +{ + "vendor=Google Nest": { + "Hub (Gen 2)": [ + "wifi4|0,1,33,36,48,45,127,191,221(00101802),221(0050f202),htcap:0163,htagg:17,htmcs:000000ff,vhtcap:0f805132,vhtrxmcs:0000fffe,vhttxmcs:0000fffe,txpow:e003,extcap:0000000000000040", + "dhcp_req|1,3,6", + "mac-oui-1c53f9|1" + ] + } +} diff --git a/data/nintendo.json b/data/nintendo.json new file mode 100644 index 0000000..2e99da5 --- /dev/null +++ b/data/nintendo.json @@ -0,0 +1,10 @@ +{ + "vendor=Nintendo|class=gaming": { + "Switch OLED": [ + "dhcp_req|1,3,6,28", + "wifi4|0,1,33,36,48,45,127,191,199,221(00101802),221(0050f202),htcap:002c,htagg:17,htmcs:0000ffff,vhtcap:03800022,vhtrxmcs:0000fffa,vhttxmcs:0000fffa,txpow:1103,extcap:0000000000000040", + "wifi4|0,1,33,36,48,45,127,221(00101802),221(0050f202),htcap:002c,htagg:17,htmcs:0000ffff,txpow:1303,extcap:0000000000000040", + "mac-oui-bcce25|1" + ] + } +} diff --git a/data/ring.json b/data/ring.json new file mode 100644 index 0000000..b1a2fad --- /dev/null +++ b/data/ring.json @@ -0,0 +1,9 @@ +{ + "vendor=Ring|class=doorbell": { + "Video Doorbell": [ + "wifi4|0,1,45,127,221(000c4306),221(0050f202),48,htcap:002c,htagg:01,htmcs:000000ff,extcap:00", + "dhcp_req|1,3,28,6", + "mac-oui-5c475e|1" + ] + } +} diff --git a/data/samsung.json b/data/samsung.json new file mode 100644 index 0000000..9ffc4d0 --- /dev/null +++ b/data/samsung.json @@ -0,0 +1,18 @@ +{ + "vendor=%": { + "Samsung": [ + "wifi-vendor-oui-0000f0|1" + ] + }, + "vendor=Samsung|class=phone": { + "Galaxy S21 Ultra 5G": [ + "wifi4|0,1,33,36,48,54,59,45,127,191,221(0000f022),221(00904c04),221(00101802),221(0050f202),htcap:006f,htagg:1b,htmcs:0000ffff,vhtcap:0f917876,vhtrxmcs:0000fffa,vhttxmcs:2000fffa,txpow:12f9,extcap:04000000010000400020", + "wifi6|0,1,33,36,48,54,59,45,127,191,255(35),221(0000f022),221(00904c04),221(00101802),221(0050f202),htcap:006f,htagg:1b,htmcs:0000ffff,vhtcap:0f917876,vhtrxmcs:0000fffa,vhttxmcs:2000fffa,txpow:12f9,extcap:04000000010000400020,hemac:800000080803,hephy:000c0c089e017d800230" + ] + }, + "vendor=Samsung|class=tablet": { + "Galaxy Tab A": [ + "wifi4|0,1,36,33,45,48,54,127,191,199,221(0000f022),221(0050f202),htcap:016f,htagg:17,htmcs:000000ff,vhtcap:03007131,vhtrxmcs:0000fffe,vhttxmcs:0000fffe,txpow:1303,extcap:0400000000000040" + ] + } +} diff --git a/data/sony.json b/data/sony.json new file mode 100644 index 0000000..ca3901e --- /dev/null +++ b/data/sony.json @@ -0,0 +1,10 @@ +{ + "vendor=Sony|class=gaming": { + "Playstation 4": [ + "dhcp_vendorid|PS4" + ], + "Playstation 5": [ + "dhcp_vendorid|PS5" + ] + } +} diff --git a/files/etc/init.d/ufp b/files/etc/init.d/ufp new file mode 100755 index 0000000..4c03421 --- /dev/null +++ b/files/etc/init.d/ufp @@ -0,0 +1,14 @@ +#!/bin/sh /etc/rc.common +# Copyright (c) 2021 OpenWrt.org + +START=80 + +USE_PROCD=1 +PROG=/usr/sbin/ufpd + +start_service() { + procd_open_instance + procd_set_param command "$PROG" + procd_set_param respawn + procd_close_instance +} diff --git a/files/usr/sbin/ufpd b/files/usr/sbin/ufpd new file mode 100755 index 0000000..ff5f242 --- /dev/null +++ b/files/usr/sbin/ufpd @@ -0,0 +1,377 @@ +#!/usr/bin/env ucode +'use strict'; +import * as uloop from "uloop"; +import * as libubus from "ubus"; +import { readfile, glob, basename } from "fs"; +let uht = require("uht"); +push(REQUIRE_SEARCH_PATH, "/usr/share/ufp/*.uc"); + +uloop.init(); +let ubus = libubus.connect(); +let fingerprints = {}; +let fingerprint_ht; +let devices = {}; +let gc_timer; +let weight = { + "mac-oui": 3.0, +}; + +function get_weight(type) { + let w = weight[type]; + if (w) + return w; + type = split(type, "-"); + if (length(type) < 2) + return null; + pop(type); + type = join("-", type); + + return weight[type]; +} + + +function match_fingerprint(key) +{ + let fp, user_fp; + + if (fingerprint_ht) + fp = fingerprint_ht.get(null, key); + user_fp = fingerprints[key]; + + if (fp && user_fp) { + fp = slice(fp); + for (entry in user_fp) + push(fp, entry); + } + + return fp ?? user_fp; +} + +let global = { + uloop: uloop, + ubus: ubus, + weight: weight, + devices: devices, + fingerprints: fingerprints, + plugins: [], + + load_fingerprint_json: function(file) { + let data = json(readfile(file)); + fingerprints = data; + }, + + get_weight: get_weight, + + add_weight: function(data) { + for (let entry in data) + weight[entry] = data[entry]; + }, + + device_refresh: function(mac) { + mac = lc(mac); + let dev = devices[mac]; + if (!dev) + return; + + dev.timestamp = time(); + }, + + device_add_data: function(mac, line) { + mac = lc(mac); + let dev = devices[mac]; + if (!dev) { + dev = devices[mac] = { + data: {}, + meta: {}, + timestamp: time() + }; + let oui = "mac-oui-" + join("", slice(split(mac, ":"), 0, 3)); + dev.data[oui] = `${oui}|1`; + } + + if (substr(line, 0, 1) == "%") { + line = substr(line, 1); + let meta = split(line, "|", 3); + if (!meta[2]) + return; + + dev.meta[meta[0]] ??= {}; + if (!get_weight(meta[1])) + return; + + dev.meta[meta[0]][meta[1]] = meta[2]; + return; + } + + let fp = split(line, "|", 2); + if (!fp[1]) + return; + + dev.data[fp[0]] = line; + } +}; + +function load_plugins() +{ + let plugins = glob("/usr/share/ufp/plugin_*.uc"); + for (let name in plugins) { + name = substr(basename(name), 0, -3); + try { + let plugin = require(name); + plugin.init(global); + push(global.plugins, plugin); + } catch (e) { + warn(`Failed to load plugin ${name}: ${e}\n${e.stacktrace[0].context}\n`); + } + } +} + +function refresh_plugins() +{ + for (let plugin in global.plugins) { + if (!plugin.refresh) + continue; + + try { + plugin.refresh(); + } catch (e) { + warn(`Failed to refresh plugin: ${e}\n${e.stacktrace[0].context}\n`); + } + } +} + +function device_gc() +{ + gc_timer.set(60 * 60 * 1000); + let timeout = time() - 60 * 60 * 24; + + for (let mac in devices) { + if (devices[mac].timestamp < timeout) + delete devices[mac]; + } +} + +// returns: { "": { "": [ , [ ] ] } } +function __device_match_list(mac) +{ + let dev = devices[mac]; + if (!dev || !length(dev)) + return null; + + let ret = {}; + let data = dev.data; + let match_devs = []; + + for (let fp in data) { + let match = match_fingerprint(data[fp]); + if (!match) + continue; + + for (let match_cur in match) + push(match_devs, [ match_cur, global.get_weight(fp), fp ]); + } + + for (let meta in dev.meta) { + let meta_cur = dev.meta[meta]; + for (let type in meta_cur) { + let match = {}; + match[meta] = meta_cur[type]; + push(match_devs, [ match, global.get_weight(type), type ]); + } + } + + for (let i = 0; i < length(match_devs); i++) { + let match = match_devs[i]; + let match_data = match[0]; + let match_weight = match[1]; + let match_fp = [ match[2] ]; + let meta_entry = {}; + + for (let j = 0; j < length(match_devs); j++) { + if (j == i) + continue; + + let cur = match_devs[j]; + let cur_data = cur[0]; + for (let key in cur_data) { + if (lc(match_data[key]) == lc(cur_data[key])) { + match_weight += cur[1]; + push(match_fp, cur[2]); + break; + } + } + } + + for (let key in match_data) { + let val = match_data[key]; + ret[key] ??= {}; + let ret_key = ret[key]; + + ret_key[val] ??= [ 0.0, {} ]; + let ret_val = ret_key[val]; + + ret_val[0] += match_weight; + for (let fp in match_fp) + ret_val[1][fp]++; + } + } + + for (let key in ret) { + let ret_key = ret[key]; + for (let val in ret_key) { + let ret_val = ret_key[val]; + ret_val[1] = keys(ret_val[1]); + } + } + + return ret; +} + +function device_match_list(mac) +{ + let match = __device_match_list(mac); + + for (let meta in match) { + let match_meta = match[meta]; + let meta_list = keys(match_meta); + sort(meta_list, (a, b) => match_meta[b][0] - match_meta[a][0]); + match[meta] = map(meta_list, (key) => [ key, match_meta[key][0], match_meta[key][1] ]); + } + + return match; +} + +global.ubus_object = { + load_fingerprints: { + args: { + file: "", + }, + call: function(req) { + let file = req.args.file; + if (!file) + return libubus.STATUS_INVALID_ARGUMENT; + + try { + global.load_fingerprint_json(file); + } catch (e) { + warn(`Exception in ubus function: ${e}\n${e.stacktrace[0].context}, file=${file}\n`); + return libubus.STATUS_INVALID_ARGUMENT; + } + + return 0; + } + }, + + get_data: { + args: { + macaddr: "", + }, + call: function(req) { + let mac = req.args.macaddr; + + refresh_plugins(); + + if (!mac) + return devices; + + let dev = devices[mac]; + if (!dev) + return libubus.STATUS_NOT_FOUND; + + return dev; + } + }, + + add_data: { + args: { + macaddr: "", + data: [] + }, + call: function(req) { + let mac = req.args.macaddr; + let data = req.args.data; + if (!mac || !data) + return libubus.STATUS_INVALID_ARGUMENT; + + for (let line in data) + global.device_add_data(mac, line); + + return 0; + } + }, + + fingerprint: { + args: { + macaddr: "", + weight: false + }, + call: function(req) { + refresh_plugins(); + + let mac_list = req.args.macaddr ? [ req.args.macaddr ] : keys(devices); + let ret = {}; + + for (let mac in mac_list) { + let match_list = device_match_list(mac); + if (!match_list) + return libubus.STATUS_NOT_FOUND; + + let cur_ret = { }; + if (req.args.weight) + cur_ret.weight = {}; + ret[mac] = cur_ret; + + for (let meta in match_list) { + let match_meta = match_list[meta]; + + if (length(match_meta) < 1) + continue; + + match_meta = match_meta[0]; + + cur_ret[meta] = match_meta[0]; + if (req.args.weight) + cur_ret.weight[meta] = match_meta[1]; + } + } + + return req.args.macaddr ? ret[req.args.macaddr] : ret; + } + }, + + list: { + args: { + macaddr: "" + }, + call: function(req) { + refresh_plugins(); + + let mac_list = req.args.macaddr ? [ req.args.macaddr ] : keys(devices); + let ret = {}; + + for (let mac in mac_list) { + let match_list = device_match_list(mac); + if (!match_list) + return libubus.STATUS_NOT_FOUND; + + let cur_ret = {}; + ret[mac] = cur_ret; + + for (let meta in match_list) + cur_ret[meta] = match_list[meta]; + } + + return req.args.macaddr ? ret[req.args.macaddr] : ret; + } + }, +}; + +try { + fingerprint_ht = uht.open("/usr/share/ufp/devices.bin"); +} catch (e) { + warn(`Failed to load fingerprints: ${e}\n${e.stacktrace[0].context}\n`); +} +load_plugins(); +ubus.publish("fingerprint", global.ubus_object); +gc_timer = uloop.timer(1000, device_gc); +uloop.run(); diff --git a/files/usr/share/ufp/plugin_dhcp.uc b/files/usr/share/ufp/plugin_dhcp.uc new file mode 100644 index 0000000..d34a582 --- /dev/null +++ b/files/usr/share/ufp/plugin_dhcp.uc @@ -0,0 +1,120 @@ +let ubus, sub, listener, global; + +const dhcp_parser_proto = { + reset: function() { + this.offset = 0; + }, + + parseAt: function(offset) { + let id = hex(substr(this.buffer, offset, 2)); + let len = hex(substr(this.buffer, offset + 2, 2)) * 2; + if (type(id) != "int" || type(len) != "int") + return null; + + let data = substr(this.buffer, offset + 4, len); + if (length(data) != len) + return null; + + return [ id, data, len + 4 ]; + }, + + next: function() { + let data = this.parseAt(this.offset); + if (!data) + return null; + + this.offset += data[2]; + return data; + }, + + foreach: function(cb) { + let offset = 0; + let data; + + while ((data = this.parseAt(offset)) != null) { + offset += data[2]; + let ret = cb(data); + if (type(ret) == "boolean" && !ret) + break; + } + }, +}; + +function dhcp_opt_parser(data) { + let parser = { + offset: 0, + buffer: data, + }; + + proto(parser, dhcp_parser_proto); + + return parser; +} + +function parse_array(data) +{ + return map(match(data, /../g), (val) => val[0]); +} + +function parse_string(data) +{ + return join("", map(parse_array(data[1]), (val) => chr(hex(val)))); +} + +function parse_macaddr(addr) +{ + return join(":", parse_array(addr)); +} + +function dhcp_cb(msg) { + if (msg.type != "discover" && msg.type != "request") + return; + + let packet = msg.data.packet; + if (!packet) + return; + + let opts = substr(packet, 240 * 2); + if (length(opts) < 16) + return; + + let macaddr = parse_macaddr(substr(packet, 28 * 2, 12)); + + opts = dhcp_opt_parser(opts); + opts.foreach((data) => { + let id = data[0]; + switch (id) { + case 12: + typestr = "%device_name|dhcp_device_name"; + data = parse_string(data); + break; + case 55: + typestr = "dhcp_req"; + data = join(",", map(parse_array(data[1]), (val) => hex(val))); + break; + case 60: + typestr = "dhcp_vendorid"; + data = parse_string(data); + break; + default: + return; + } + global.device_add_data(macaddr, `${typestr}|${data}`); + }); +} + +function init(gl) { + global = gl; + ubus = gl.ubus; + + gl.weight.dhcp_req = 1.1; + gl.weight.dhcp_device_name = 5.0; + sub = ubus.subscriber(dhcp_cb); + listener = ubus.listener("ubus.object.add", (event, msg) => { + if (msg.path == "dhcpsnoop") + sub.subscribe(msg.path); + }); + sub.subscribe("dhcpsnoop"); +} + +return { init }; diff --git a/files/usr/share/ufp/plugin_mdns.uc b/files/usr/share/ufp/plugin_mdns.uc new file mode 100644 index 0000000..6a0f16f --- /dev/null +++ b/files/usr/share/ufp/plugin_mdns.uc @@ -0,0 +1,298 @@ +let ubus, global, last_refresh; +let rtnl = require("rtnl"); + +const homekit_types = { + "2": "Bridge", + "3": "Fan", + "4": "Garage Door Opener", + "5": "Lightbulb", + "6": "Door Lock", + "7": "Outlet", + "8": "Switch", + "9": "Thermostat", + "10": "Sensor", + "11": "Security System", + "12": "Door", + "13": "Window", + "14": "Window Covering", + "15": "Programmable Switch", + "17": "IP Camera", + "18": "Video Doorbell", + "19": "Air Purifier", + "20": "Heater", + "21": "Air Conditioner", + "22": "Humidifier", + "23": "Dehumidifier", + "28": "Sprinkler", + "29": "Faucet", + "30": "Shower System", + "31": "Television", + "32": "Remote Control", + "33": "WiFi Router", + "34": "Audio Receiver", + "35": "TV Set Top Box", + "36": "TV Stick", +}; + +function get_arp_cache() { + let ret = {}; + + let cache = rtnl.request(rtnl.const.RTM_GETNEIGH, rtnl.const.NLM_F_DUMP, {}); + for (let data in cache) { + if (!data.lladdr || !data.dst) + continue; + + ret[data.dst] = data.lladdr; + } + + return ret; +} + +function find_arp_entry(arp, addrs) +{ + for (let addr in addrs) { + let val = arp[addr]; + if (val) + return val; + } + return null; +} + +function get_host_addr_cache() +{ + let arp = get_arp_cache(); + let host_cache = ubus.call("umdns", "hosts", { array: true }); + let host_addr = {}; + for (let host in host_cache) { + let host_data = host_cache[host]; + host_addr[host] = find_arp_entry(arp, host_data.ipv4) ?? + find_arp_entry(arp, host_data.ipv6); + } + + return host_addr; +} + +function convert_txt_array(data) { + let txt = {}; + + for (let rec in data) { + rec = split(rec, "=", 2); + if (rec[1]) + txt[rec[0]] = rec[1]; + } + + return txt; +} + +function handle_apple(txt, name) +{ + let ret = []; + let model = txt.model ?? txt.rpMd ?? txt.am; + if (model) + push(ret, `apple_model|${model}`); + if (name) + push(ret, `%device_name|mdns_implicit_device_name|${name}`); + + return ret; +} + +function handle_homekit(txt) +{ + let ret = []; + if (txt.ci) { + let type = homekit_types[txt.ci]; + if (type) + push(ret, `%class|homekit_class|${type}`); + } + + if (txt.md) + push(ret, `%device|mdns_model_string|${txt.md}`); + + return ret; +} + +function handle_googlecast_model(txt) +{ + let ret = []; + let model = txt.model ?? txt.rpMd ?? txt.md; + if (model) + push(ret, `%device|mdns_model_string|${model}`); + if (txt.fn) + push(ret, `%device_name|mdns_device_name|${txt.fn}`); + if (txt.rs == 'TV') + push(ret, "%class|mdns_tv|TV"); + return ret; +} + +function handle_printer(txt) +{ + let ret = []; + let vendor = txt.usb_MFG; + let model = txt.usb_MDL ?? txt.ty ?? replace(txt.product, /^\((.*)\)$/, "$1"); + let weight = (txt.usb_MFG && txt.usb_MDL) ? "mdns_vendor_model_string" : "mdns_model_string"; + if (vendor) + push(ret, `%vendor|${weight}|${vendor}`); + if (model) + push(ret, `%device|${weight}|${model}`); + push(ret, "%class|mdns_printer|Printer"); + + return ret; +} + +function handle_scanner(txt) +{ + let ret = []; + let vendor = txt.mfg; + let model = txt.mdl ?? txt.ty; + let weight = (txt.mfg && txt.mdl) ? "mdns_vendor_model_string" : "mdns_model_string"; + if (vendor) + push(ret, `%vendor|${weight}|${vendor}`); + if (model) + push(ret, `%device|${weight}|${model}`); + push(ret, "%class|mdns_scanner|Scanner"); + + return ret; +} + +function handle_hue(txt, name) +{ + let ret = []; + push(ret, `%vendor|mdns_service|Philips`); + push(ret, `%device|mdns_service|Hue`); + if (name) + push(ret, `%device_name|mdns_implicit_device_name|${name}`); + + return ret; +} + +function handle_fritzbox(txt) +{ + let ret = []; + push(ret, `%vendor|mdns_service|AVM`); + push(ret, `%device|mdns_service|FRITZ!Box`); + + return ret; +} + +const service_handler = { + "_airplay._tcp": handle_apple, + "_companion-link._tcp": handle_apple, + "_raop._tcp": (txt) => handle_apple(txt), // skip name + "_googlecast._tcp": handle_googlecast_model, + "_ipp._tcp": handle_printer, + "_scanner._tcp": handle_scanner, + "_hap._tcp": handle_homekit, + "_hap._udp": handle_homekit, + "_hue._tcp": handle_hue, + "_fbox._tcp": handle_fritzbox, + "_avmnexus._tcp": handle_fritzbox, +}; + +function arp_resolve(list) +{ + let host_cache = ubus.call("umdns", "hosts", { array: true }); + for (let entry in list) { + let iface = entry[0]; + let host = entry[1]; + if (!host_cache[host]) + continue; + for (let addr in host_cache[host].ipv4) + rtnl.request(rtnl.const.RTM_NEWNEIGH, + rtnl.const.NLM_F_CREATE | rtnl.const.NLM_F_REPLACE, + { dev: iface, state: rtnl.const.NUD_INCOMPLETE, + flags: rtnl.const.NTF_USE, family: rtnl.const.AF_INET, + dst: addr }); + } +} + +function refresh() { + if (last_refresh == time()) + return; + + let host_cache = get_host_addr_cache(); + let query_list = []; + let arp_pending = []; + let mdns_data; + + for (let i = 0; i < 2; i++) { + mdns_data = ubus.call("umdns", "browse", { array: true, address: false }); + for (let service in mdns_data) { + if (!service_handler[service]) + continue; + let service_data = mdns_data[service]; + for (let host in service_data) { + let host_entry = service_data[host].host; + let iface = service_data[host].iface; + if (!iface) + continue; + if (!host_entry) + push(query_list, [ `${host}.${service}.local`, iface ]); + else if (!host_cache[host_entry]) + push(arp_pending, [ iface, host_entry ]); + } + } + + if (!length(arp_pending) && !length(query_list)) + break; + + if (length(arp_pending) > 0) + arp_resolve(arp_pending); + + if (length(query_list) > 0) { + for (let query in query_list) + ubus.call("umdns", "query", { question: query[0], interface: query[1] }); + } + + sleep(1000); + + host_cache = get_host_addr_cache(); + mdns_data = ubus.call("umdns", "browse", { array: true, address: false }); + } + + for (let service in mdns_data) { + if (!service_handler[service]) + continue; + + let service_data = mdns_data[service]; + for (let host in service_data) { + let txt = service_data[host].txt; + if (!txt) + continue; + + let host_entry = service_data[host].host; + if (!host_entry) + continue; + + let mac = host_cache[host_entry]; + if (!mac) + continue; + + txt = convert_txt_array(txt); + let match = service_handler[service](txt, host); + for (let info in match) + global.device_add_data(mac, info); + + global.device_refresh(mac); + } + } +} + +function init(gl) { + global = gl; + ubus = gl.ubus; + + global.add_weight({ + apple_model: 10.0, + mdns_device_name: 10.0, + mdns_implicit_device_name: 5.0, + mdns_vendor_model_string: 10.0, + mdns_service: 10.0, + mdns_tv: 5.0, + mdns_model_string: 5.0, + mdns_printer: 5.0, + mdns_scanner: 1.0, + homekit_class: 2.0, + }); +} + +return { init, refresh }; diff --git a/files/usr/share/ufp/plugin_wifi.uc b/files/usr/share/ufp/plugin_wifi.uc new file mode 100644 index 0000000..723cd07 --- /dev/null +++ b/files/usr/share/ufp/plugin_wifi.uc @@ -0,0 +1,320 @@ +import * as struct from "struct"; +let ubus, uloop, global, timer; +let ap_cache = {}; + +const ie_tags = { + PWR_CAPABILITY: 33, + HT_CAP: 45, + EXT_CAPAB: 127, + VHT_CAP: 191, + __EXT_START: 0x100, + HE_CAP: 0x100 | 35, + VENDOR_WPS: 0x0050f204, +}; + +const ie_parser_proto = { + reset: function() { + this.offset = 0; + }, + + parseAt: function(offset) { + let hdr = substr(this.buffer, offset, 2); + if (length(hdr) != 2) + return null; + + let data = this.hdr.unpack(hdr); + if (length(data != 2)) + return null; + + let len = data[1]; + offset += 2; + data[1] += 2; + + if (data[0] == 221 && len >= 4) { + hdr = substr(this.buffer, offset, 4); + if (length(hdr) != 4) + return null; + + let val = this.vendor_hdr.unpack(hdr); + if (length(val) != 1 || val[0] < 0x200) + return null; + + data[0] = val[0]; + len -= 4; + offset += 4; + } else if (data[0] == 255 && len >= 1) { + hdr = substr(this.buffer, offset, 2); + if (length(hdr) != 2) + return null; + data[0] = 0x100 + this.hdr.unpack(hdr)[0]; + len -= 1; + offset += 1; + } + + data[2] = data[1]; + data[1] = substr(this.buffer, offset, len); + if (length(data[1]) != len) + return null; + + return data; + }, + + next: function() { + let data = this.parseAt(this.offset); + if (!data) + return null; + + this.offset += data[2]; + return data; + }, + + foreach: function(cb) { + let offset = 0; + let data; + + while ((data = this.parseAt(offset)) != null) { + offset += data[2]; + let ret = cb(data); + if (type(ret) == "boolean" && !ret) + break; + } + }, +}; + +function ie_parser(data) { + let parser = { + offset: 0, + buffer: data, + hdr: struct.new("BB"), + vendor_hdr: struct.new(">I"), + }; + + proto(parser, ie_parser_proto); + + return parser; +} + +function format_fn(unpack_str, format) +{ + return (data) => { + data = struct.unpack(unpack_str, data); + if (data && data[0]) + data = data[0]; + else + data = 0; + return sprintf(format, data) + }; +} + +let unpack; +unpack = { + u8: format_fn("B", "%02x"), + le16: format_fn(" join("", map(split(data, ""), unpack.u8)), +}; +let fingerprint_order = [ + "htcap", "htagg", "htmcs", "vhtcap", "vhtrxmcs", "vhttxmcs", + "txpow", "extcap", "wps", "hemac", "hephy" +]; + +function format_wps_ie(data) { + let offset = 0; + let len = length(data); + let s = struct.new("= 0x200) + return sprintf("221(%08x)", id); + if (id >= 0x100) + return sprintf("255(%d)", id - 0x100); + return sprintf("%d", id); +} + +let vendor_ie_filter = [ + 0x0050f2, // Microsoft WNN + 0x506f9a, // WBA + 0x8cfdf0, // Qualcom + 0x001018, // Broadcom + 0x000c43, // Ralink +]; + +function ie_fingerprint(data, mode) { + let caps = { + tags: [], + vendor_list: {} + }; + let parser = ie_parser(data); + + parser.foreach(function(ie) { + let skip = false; + let val = ie[1]; + switch (ie[0]) { + case ie_tags.HT_CAP: + caps.htcap = unpack.le16(substr(val, 0, 2)); + caps.htagg = unpack.u8(substr(val, 2, 1)); + caps.htmcs = unpack.le32(substr(val, 3, 4)); + break; + case ie_tags.VHT_CAP: + caps.vhtcap = unpack.le32(substr(val, 0, 4)); + caps.vhtrxmcs = unpack.le32(substr(val, 4, 4)); + caps.vhttxmcs = unpack.le32(substr(val, 8, 4)); + break; + case ie_tags.EXT_CAPAB: + caps.extcap = unpack.bytes(val); + break; + case ie_tags.PWR_CAPABILITY: + caps.txpow = unpack.le16(val); + break; + case ie_tags.VENDOR_WPS: + caps.wps = format_wps_ie(val); + break; + case ie_tags.HE_CAP: + if (mode != "wifi6") { + skip = true; + break; + } + caps.hemac = + unpack.le16(substr(val, 4, 2)) + + unpack.le32(substr(val, 0, 4)); + caps.hephy = + unpack.le16(substr(val, 15, 2)) + + unpack.le32(substr(val, 11, 4)) + + unpack.le32(substr(val, 7, 4)); + break; + } + if (ie[0] > 0x200) { + let vendor = ie[0] >> 8; + if (!(vendor in vendor_ie_filter)) + caps.vendor_list[sprintf("%06x", vendor)] = 1; + } + if (!skip) + push(caps.tags, ie[0]); + return null; + }); + + switch (mode) { + case "wifi6": + if (mode == "wifi6" && !caps.hemac) + return null; + break; + case "wifi-vendor-oui": + return caps.vendor_list; + default: + break; + } + + let tags = map(caps.tags, ie_fingerprint_str); + return + join(",", tags) + "," + + join(",", map( + filter(fingerprint_order, (key) => !!caps[key]), + (key) => `${key}:${caps[key]}` + )); +} + +function fingerprint(mac, mode, ies) { + switch (mode) { + case "wifi4": + if (!ies.assoc_ie) + break; + + let assoc = ie_fingerprint(ies.assoc_ie, mode); + if (!assoc) + break; + + global.device_add_data(mac, `${mode}|${assoc}`); + break; + case "wifi-vendor-oui": + let list = ie_fingerprint(ies.assoc_ie, mode); + for (let oui in list) { + global.device_add_data(mac, `${mode}-${oui}|1`); + } + break; + case "wifi6": + default: + let val = ie_fingerprint(ies.assoc_ie, mode); + if (!val) + break; + + global.device_add_data(mac, `${mode}|${val}`); + break; + } +} + +const fingerprint_modes = [ "wifi4", "wifi6", "wifi-vendor-oui" ]; + +function client_refresh(ap, mac, prev_cache) +{ + let ies = ubus.call(ap, "get_sta_ies", { address: mac }); + if (type(ies) != "object" || !ies.assoc_ie) + return null; + + ies.assoc_ie = b64dec(ies.assoc_ie); + if (ies.probe_ie) + ies.probe_ie = b64dec(ies.probe_ie); + + for (let mode in fingerprint_modes) + fingerprint(mac, mode, ies); + + return ies; +} + +function refresh() +{ + let ap_objs = filter(ubus.list(), (name) => match(name, /^hostapd\./)); + let prev_cache = ap_cache; + ap_cache = {}; + + timer.set(30 * 1000); + for (let ap in ap_objs) { + try { + let cur_cache = {}; + let prev_ap_cache = prev_cache[ap] ?? {}; + + ap_cache[ap] = cur_cache; + + let clients = ubus.call(ap, "get_clients").clients; + for (let client in clients) { + let client_cache = prev_ap_cache[client]; + if (!client_cache || client_cache.assoc_ie || !client_cache.probe_ie) + client_cache = client_refresh(ap, client); + global.device_refresh(client); + } + } catch (e) { + } + } +} + +function init(gl) { + global = gl; + ubus = gl.ubus; + uloop = gl.uloop; + + global.add_weight({ + wifi4: 2.0, + wifi6: 3.0, + "wifi-vendor-oui": 2.0 + }); + + timer = uloop.timer(1000, refresh); +} + +return { init, refresh }; diff --git a/scripts/convert-devices.uc b/scripts/convert-devices.uc new file mode 100644 index 0000000..44b2192 --- /dev/null +++ b/scripts/convert-devices.uc @@ -0,0 +1,58 @@ +#!/usr/bin/env ucode +'use strict'; +import { readfile, basename } from "fs"; +let uht = require("uht"); + +let signatures = {}; + +function parse_category(str) { + let items = split(str, "|"); + let data = {}; + for (let item in items) { + item = split(item, "=", 2); + if (item[1] == "%") + data["%val"] = item[0]; + else + data[item[0]] = item[1]; + } + + return data; +} + +function get_device(meta, name) +{ + let dev = {}; + + dev[meta["%val"] ?? "device"] = name; + for (let type in meta) { + if (substr(type, 0, 1) == "%") + continue; + + dev[type] = meta[type]; + } + + return dev; +} + +let out = shift(ARGV); +if (!out) { + warn(`Syntax: ${basename(sourcepath())} [ ...]\n`); + exit(1); +} + +for (let file in ARGV) { + let data = json(readfile(file)); + for (let category_str in data) { + let category = parse_category(category_str); + let devices = data[category_str]; + for (let dev in devices) { + for (let sig in devices[dev]) { + signatures[sig] ??= []; + push(signatures[sig], get_device(category, dev)); + } + } + } +} + +uht.mark_hashtable(signatures); +uht.save(out, signatures); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..0f84541 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,36 @@ +cmake_minimum_required(VERSION 3.13) + +PROJECT(ucode-uht C) +ADD_DEFINITIONS(-Os -ggdb -Wall -Werror --std=gnu99 -ffunction-sections -fwrapv -D_GNU_SOURCE) + +IF(CMAKE_C_COMPILER_VERSION VERSION_GREATER 6) + ADD_DEFINITIONS(-Wextra -Werror=implicit-function-declaration) + ADD_DEFINITIONS(-Wformat -Werror=format-security -Werror=format-nonliteral) +ENDIF() +ADD_DEFINITIONS(-Wmissing-declarations -Wno-error=unused-variable -Wno-unused-parameter) + +IF(APPLE) + SET(UCODE_MODULE_LINK_OPTIONS "LINKER:-undefined,dynamic_lookup") + ADD_DEFINITIONS(-DBIND_8_COMPAT) +ELSE() + SET(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "-Wl,--gc-sections") +ENDIF() + +IF(DEBUG) + ADD_DEFINITIONS(-DDEBUG -g3 -O0) +ELSE() + ADD_DEFINITIONS(-DNDEBUG) +ENDIF() + +FIND_LIBRARY(ubox NAMES ubox) +FIND_LIBRARY(ucode NAMES ucode) +FIND_PATH(uloop_include_dir NAMES libubox/uloop.h) +FIND_PATH(ucode_include_dir NAMES ucode/module.h) +INCLUDE_DIRECTORIES(${uloop_include_dir} ${ucode_include_dir}) + +ADD_LIBRARY(uht_lib MODULE ucode.c uht.c xxhash32.c) +SET_TARGET_PROPERTIES(uht_lib PROPERTIES OUTPUT_NAME uht PREFIX "") +TARGET_LINK_OPTIONS(uht_lib PRIVATE ${UCODE_MODULE_LINK_OPTIONS}) +TARGET_LINK_LIBRARIES(uht_lib ${ubox}) + +INSTALL(TARGETS uht_lib LIBRARY DESTINATION lib/ucode) diff --git a/src/README.md b/src/README.md new file mode 100644 index 0000000..4f3003e --- /dev/null +++ b/src/README.md @@ -0,0 +1,81 @@ +# ucode library for creating and using read-only hashtable files + +This allows fast lookup on files with lots of entries. Read-only hash table +files support json compatible data with objects that can selectively be turned +into hash tables for fast lookup. +These files can be loaded via mmap, to make it easy to share them across +processes and not have to keep contents in RAM at all times. +Any values inside the file are de-duplicated at creation time and referenced +via offsets. + +## Create hashtable files + +```javascript +let obj = { + // example data + hashtbl1: { + "foo": "bar", + //... + }, + hashtbl2: { + "foo": "bar1", + //... + } +}; + +let uht = require("uht"); +uht.mark_hashtable(obj.hashtbl1); +uht.mark_hashtable(obj.hashtbl2); +uht.save("filename.bin", obj); +``` + +## Use hashtable files + +```javascript +// Load File +let uht = require("uht"); +let file = uht.open("filename.bin"); + +// get outer object (not marked as hashtable) +let data = file.get(); + +// look up value from inner object +let val = file.get(data.hashtbl1, "foo"); +warn(`value: ${val}\n`); // returns "value: bar" +``` + + +## API: + +### `uht.save(filename, value)` + +Saves a ucode value as a uht file. Supports any json compatible datatype +(though most frequently used with an object). +By default, no hashtables are embedded. Any object intended to be used as +hashtable for faster lookup needs to be marked with `uht.mark_hashtable(obj)` +Arbitrary nesting is supported. + +### `uht.mark_hashtable(obj)` + +Mark an object for hashtable. This adds a key as marker, by default `"##hash": true`. + +### `uht.set_hashtable_key(key)` + +Set the marker key used to mark objects as hashtables. Must be used before +writing or dumping hashtables. + +### `ht = uht.open(filename)` + +Open a uht binary file and return a resource. +Returns null on failure. + +### `ht.get(val, key, dump)` + +When used without arguments, it returns the outermost value of the uht file +(typically an object). If dump is true, embedded hashtables are returned +as ucode objects. This is useful for dumping or rewriting into a new file after +modifications. +val can be used to reference embedded hashtables (obtained by previous `get()` +calls with `dump=false`). +If `key` is given, it performs a hashtable lookup and returns the result. +`val` may only be `null`, if the outer object is itself a hash table. diff --git a/src/ucode.c b/src/ucode.c new file mode 100644 index 0000000..5b06a54 --- /dev/null +++ b/src/ucode.c @@ -0,0 +1,240 @@ +#include +#include "uht.h" + +static uc_resource_type_t *reader_type, *hashtbl_type; +static char *hash_key; + +static uint32_t +writer_store_data(struct uht_writer *wr, uc_value_t *val) +{ + uint32_t *data, ret; + size_t i, len; + + switch (ucv_type(val)) { + case UC_STRING: + return uht_writer_add_string(wr, ucv_string_get(val)); + case UC_INTEGER: + return uht_writer_add_int(wr, ucv_int64_get(val)); + case UC_DOUBLE: + return uht_writer_add_double(wr, ucv_double_get(val)); + case UC_BOOLEAN: + return uht_writer_add_bool(wr, ucv_boolean_get(val)); + case UC_ARRAY: + len = ucv_array_length(val); + data = calloc(len, sizeof(*data)); + for (i = 0; i < len; i++) + data[i] = writer_store_data(wr, ucv_array_get(val, i)); + ret = uht_writer_add_array(wr, data, len); + free(data); + return ret; + case UC_OBJECT: + len = ucv_object_length(val); + if (ucv_is_truish(ucv_object_get(val, hash_key, NULL))) { + ret = uht_writer_hashtbl_alloc(wr, len - 1); + ucv_object_foreach(val, key, value) { + if (!strcmp(key, hash_key)) + continue; + uht_writer_hashtbl_add_element(wr, ret, key, + writer_store_data(wr, value)); + } + uht_writer_hashtbl_done(wr, ret); + return ret; + } + data = calloc(2 * len, sizeof(*data)); + i = 0; + ucv_object_foreach(val, key, value) { + data[i] = uht_writer_add_string(wr, key); + data[len + i] = writer_store_data(wr, value); + i++; + } + ret = uht_writer_add_object(wr, data, data + len, len); + free(data); + return ret; + default: + return 0; + } +} + +static uc_value_t * +writer_save(uc_vm_t *vm, size_t nargs) +{ + struct uht_writer wr = {}; + uc_value_t *file = uc_fn_arg(0); + uc_value_t *data = uc_fn_arg(1); + uint32_t val; + int ret = -1; + FILE *f; + + if (ucv_type(file) != UC_STRING || !data) + return NULL; + + f = fopen(ucv_string_get(file), "w"); + if (!f) + return NULL; + + uht_writer_init(&wr); + val = writer_store_data(&wr, data); + if (val) + ret = uht_writer_save(&wr, f, val); + fflush(f); + fclose(f); + uht_writer_free(&wr); + + return ucv_boolean_new(ret == 0); +} + +static uc_value_t * +__reader_get_value(uc_vm_t *vm, struct uht_reader *r, uint32_t attr, bool dump) +{ + enum uht_type type = uht_entry_type(attr); + uc_value_t *val; + size_t i; + + switch (type) { + case UHT_NULL: + return NULL; + case UHT_STRING: + return ucv_string_new(uht_reader_get_string(r, attr)); + case UHT_INT: + return ucv_int64_new(uht_reader_get_int(r, attr)); + case UHT_DOUBLE: + return ucv_double_new(uht_reader_get_double(r, attr)); + case UHT_BOOL: + return ucv_boolean_new(uht_reader_get_bool(r, attr)); + case UHT_HASHTBL: + if (!dump) + return ucv_resource_new(hashtbl_type, (void *)(uintptr_t)attr); + /* fallthrough */ + case UHT_OBJECT: + val = ucv_object_new(vm); + if (type == UHT_HASHTBL) + ucv_object_add(val, hash_key, ucv_boolean_new(true)); + uht_for_each(r, iter, attr) + ucv_object_add(val, iter.key, ucv_get(__reader_get_value(vm, r, iter.val, dump))); + return val; + case UHT_ARRAY: + val = ucv_array_new(vm); + i = 0; + uht_for_each(r, iter, attr) + ucv_array_set(val, i++, ucv_get(__reader_get_value(vm, r, iter.val, dump))); + return val; + } + + return NULL; +} + +static uc_value_t * +reader_open(uc_vm_t *vm, size_t nargs) +{ + uc_value_t *file = uc_fn_arg(0); + struct uht_reader *r; + + if (ucv_type(file) != UC_STRING) + return NULL; + + r = calloc(1, sizeof(*r)); + if (uht_reader_open(r, ucv_string_get(file))) { + free(r); + return NULL; + } + + return ucv_resource_new(reader_type, r); +} + +static void +reader_free(void *ptr) +{ + struct uht_reader *r = ptr; + + uht_reader_close(r); + free(r); +} + +static uc_value_t * +set_hashtbl_key(uc_vm_t *vm, size_t nargs) +{ + uc_value_t *key = uc_fn_arg(0); + + if (ucv_type(key) != UC_STRING) + return NULL; + + free(hash_key); + hash_key = strdup(ucv_string_get(key)); + + return ucv_boolean_new(true); +} + +static uc_value_t * +mark_hashtbl(uc_vm_t *vm, size_t nargs) +{ + uc_value_t *obj = uc_fn_arg(0); + + if (ucv_type(obj) != UC_OBJECT) + return NULL; + + ucv_object_add(obj, hash_key, ucv_boolean_new(true)); + return ucv_boolean_new(true); +} + +static uint32_t +reader_get_htable(uc_value_t *tbl) +{ + void *htbl; + + if (ucv_type(tbl) != UC_RESOURCE) + return 0; + + htbl = ucv_resource_data(tbl, "uht.hashtbl"); + if (!htbl) + return 0; + + return (uint32_t)(uintptr_t)htbl; +} + +static uc_value_t * +reader_get(uc_vm_t *vm, size_t nargs) +{ + struct uht_reader *r = uc_fn_thisval("uht.reader"); + uc_value_t *tbl = uc_fn_arg(0); + uc_value_t *key = uc_fn_arg(1); + uc_value_t *dump = uc_fn_arg(2); + uint32_t val; + + if (!r) + return NULL; + + if (tbl) + val = reader_get_htable(tbl); + else + val = r->val; + + if (key) { + if (uht_entry_type(val) != UHT_HASHTBL) + return 0; + + val = uht_reader_hashtbl_lookup(r, val, ucv_string_get(key)); + } + + return __reader_get_value(vm, r, val, ucv_is_truish(dump)); +} + +static const uc_function_list_t no_fns[] = {}; + +static const uc_function_list_t reader_fns[] = { + { "get", reader_get }, +}; + +static const uc_function_list_t hashtbl_fns[] = { + { "save", writer_save }, + { "open", reader_open }, + { "mark_hashtable", mark_hashtbl }, + { "set_hashtable_key", set_hashtbl_key }, +}; + +void uc_module_init(uc_vm_t *vm, uc_value_t *scope) +{ + hash_key = strdup("##hash_table"); + reader_type = uc_type_declare(vm, "uht.reader", reader_fns, reader_free); + hashtbl_type = uc_type_declare(vm, "uht.hashtbl", no_fns, NULL); + uc_function_list_register(scope, hashtbl_fns); +} diff --git a/src/uht.c b/src/uht.c new file mode 100644 index 0000000..cb01edf --- /dev/null +++ b/src/uht.c @@ -0,0 +1,442 @@ +#include +#include +#include +#include +#include + +#include + +#include "xxhash32.h" +#include "uht.h" + +#define ALIGN_OFS(ofs) ((ofs + UHT_ALIGN_MASK) & ~UHT_ALIGN_MASK) + +#define UHT_HASHTBL_SIZE_SHIFT 5 +#define UHT_HASHTBL_ORDER_MASK ((1 << UHT_HASHTBL_SIZE_SHIFT) - 1) + +#define UHT_HASHTBL_KEY_FLAG_FIRST 1 + +struct uht_file_hdr { + uint8_t version; + uint8_t _pad[3]; + uint32_t val; +}; + +struct uht_key { + uint32_t entry; + uint32_t len; +}; + +struct uht_entry { + struct avl_node node; + struct uht_key key; +}; + +struct uht_hashtbl_meta { + uint32_t *ht; + uint32_t *ht_slot; + uint32_t *ht_entry; + uint32_t elements; + uint8_t order; +}; + +static int +uht_key_comp(const void *k1, const void *k2, void *ptr) +{ + const struct uht_key *key1 = k1, *key2 = k2; + struct uht_writer *wr = ptr; + + if (key1->len != key2->len) + return key1->len - key2->len; + + return memcmp(uht_entry_ptr(wr->buf, key1->entry), + uht_entry_ptr(wr->buf, key2->entry), key1->len); +} + +static uint32_t +uht_writer_check_insert(struct uht_writer *wr, struct uht_key *key) +{ + struct uht_entry *entry; + + entry = avl_find_element(&wr->data, key, entry, node); + if (entry) + return entry->key.entry; + + entry = calloc(1, sizeof(*entry)); + entry->node.key = &entry->key; + entry->key = *key; + avl_insert(&wr->data, &entry->node); + wr->buf_ofs += key->len; + + return key->entry; +} + +void uht_writer_init(struct uht_writer *wr) +{ + avl_init(&wr->data, uht_key_comp, false, wr); + wr->buf_len = 256; + wr->buf = calloc(1, wr->buf_len); + wr->buf_ofs = sizeof(struct uht_file_hdr); +} + +static void * +__uht_writer_alloc(struct uht_writer *wr, struct uht_key *key, size_t size) +{ + void *ret; + + if (size >= (1 << 24) || wr->buf_ofs + size >= (1 << 30)) + return NULL; + + size = ALIGN_OFS(size); + while (wr->buf_ofs + size > wr->buf_len) { + wr->buf_len <<= 1; + wr->buf = realloc(wr->buf, wr->buf_len); + } + + key->len = size; + key->entry = wr->buf_ofs << (UHT_TYPE_BITS - UHT_ALIGN_BITS); + ret = wr->buf + wr->buf_ofs; + + return ret; +} + +static void * +uht_writer_alloc(struct uht_writer *wr, struct uht_key *key, size_t size) +{ + void *ret = __uht_writer_alloc(wr, key, size); + wr->buf_ofs += size; + return ret; +} + +static uint32_t +uht_writer_add_generic(struct uht_writer *wr, const void *data, size_t len) +{ + struct uht_key key; + + memcpy(__uht_writer_alloc(wr, &key, len), data, len); + return uht_writer_check_insert(wr, &key); +} + +static int uht_hashtbl_get_meta(struct uht_hashtbl_meta *meta, void *buf, size_t len, + uint32_t attr) +{ + uint32_t val; + + meta->ht = buf + uht_entry_offset(attr); + val = cpu_to_le32(*meta->ht); + meta->order = val & UHT_HASHTBL_ORDER_MASK; + meta->elements = val >> UHT_HASHTBL_SIZE_SHIFT; + meta->ht_slot = meta->ht + 1; + meta->ht_entry = meta->ht_slot + (1 << meta->order); + if ((void *)&meta->ht_entry[2 * meta->elements] > buf + len) + return -1; + + return 0; +} + +uint32_t uht_writer_hashtbl_alloc(struct uht_writer *wr, size_t n_members) +{ + struct uht_key key; + uint32_t *ht, ht_size; + uint8_t order = 2; + + if (n_members >= 1 << 24) + return 0; + + while (n_members > 1U << order) + order++; + + ht_size = 4 + (4 << order) + 8 * n_members; + ht = uht_writer_alloc(wr, &key, ht_size); + if (!ht) + return 0; + + memset(ht, 0, ht_size); + *ht = order; + + return key.entry | UHT_HASHTBL; +} + + +void uht_writer_hashtbl_add_element(struct uht_writer *wr, uint32_t hashtbl, + const char *key, uint32_t val) +{ + struct uht_hashtbl_meta meta = {}; + uint32_t key_attr = uht_writer_add_string(wr, key); + uint32_t *ht_next; + + if (uht_hashtbl_get_meta(&meta, wr->buf, wr->buf_ofs, hashtbl)) + return; + + ht_next = &meta.ht_entry[2 * meta.elements]; + *meta.ht = cpu_to_le32(le32_to_cpu(*meta.ht) + (1 << UHT_HASHTBL_SIZE_SHIFT)); + ht_next[0] = cpu_to_le32(key_attr); + ht_next[1] = cpu_to_le32(val); +} + +static uint32_t +uht_hashtbl_key_slot(const char *key, uint8_t order) +{ + uint32_t mask = (1 << order) - 1; + + return XXH32(key, strlen(key), 0) & mask; +} + + +static uint32_t +uht_hashtbl_entry_key_slot(struct uht_writer *wr, uint32_t k, uint8_t order) +{ + return uht_hashtbl_key_slot(uht_entry_ptr(wr->buf, k), order); +} + +struct uht_writer *__sort_wr; +static uint8_t __sort_order; +static int __entry_sort_fn(const void *a1, const void *a2) +{ + struct uht_writer *wr = __sort_wr; + const uint32_t *k1 = a1, *k2 = a2; + uint32_t slot1 = uht_hashtbl_entry_key_slot(wr, le32_to_cpu(*k1), __sort_order); + uint32_t slot2 = uht_hashtbl_entry_key_slot(wr, le32_to_cpu(*k2), __sort_order); + + return slot1 - slot2; +} + +void uht_writer_hashtbl_done(struct uht_writer *wr, uint32_t hashtbl) +{ + struct uht_hashtbl_meta meta = {}; + uint32_t last_slot = ~0; + + if (uht_hashtbl_get_meta(&meta, wr->buf, wr->buf_ofs, hashtbl)) + return; + + __sort_wr = wr; + __sort_order = meta.order; + qsort(meta.ht_entry, meta.elements, 8, __entry_sort_fn); + for (size_t i = 0; i < meta.elements; i++) { + uint32_t key_attr = cpu_to_le32(meta.ht_entry[2 * i]); + uint32_t slot = uht_hashtbl_entry_key_slot(wr, key_attr, __sort_order); + meta.ht_entry[2 * i] &= ~cpu_to_le32(UHT_TYPE_MASK); + if (slot != last_slot) + meta.ht_entry[2 * i] |= cpu_to_le32(UHT_HASHTBL_KEY_FLAG_FIRST); + meta.ht_slot[slot] = cpu_to_le32(i); + last_slot = slot; + } +} + +uint32_t uht_writer_add_array(struct uht_writer *wr, uint32_t *values, size_t n) +{ + struct uht_key key; + uint32_t *data; + + data = __uht_writer_alloc(wr, &key, 4 + n * 4); + *(data++) = cpu_to_le32(n); + for (size_t i = 0; i < n; i++) + *(data++) = cpu_to_le32(values[i]); + + return uht_writer_check_insert(wr, &key) | UHT_ARRAY; +} + +uint32_t uht_writer_add_object(struct uht_writer *wr, uint32_t *keys, + uint32_t *values, size_t n) +{ + struct uht_key key; + uint32_t *data; + + data = __uht_writer_alloc(wr, &key, 4 + n * 8); + *(data++) = cpu_to_le32(n); + for (size_t i = 0; i < n; i++) { + *(data++) = cpu_to_le32(keys[i]); + *(data++) = cpu_to_le32(values[i]); + } + return uht_writer_check_insert(wr, &key) | UHT_OBJECT; +} + +uint32_t uht_writer_add_string(struct uht_writer *wr, const char *val) +{ + return uht_writer_add_generic(wr, val, strlen(val) + 1) | UHT_STRING; +} + +uint32_t uht_writer_add_double(struct uht_writer *wr, double val) +{ + union { + double d; + uint64_t u64; + } v = { + .d = val + }; + v.u64 = cpu_to_le64(v.u64); + return uht_writer_add_generic(wr, &v.u64, 8) | UHT_DOUBLE; +} + +uint32_t uht_writer_add_int(struct uht_writer *wr, int64_t val) +{ + val = cpu_to_le64(val); + return uht_writer_add_generic(wr, &val, 8) | UHT_INT; +} + +int uht_writer_save(struct uht_writer *wr, FILE *out, uint32_t val) +{ + struct uht_file_hdr *hdr = wr->buf; + + hdr->val = val; + + if (fwrite(wr->buf, 1, wr->buf_ofs, out) != wr->buf_ofs) + return -1; + + return 0; +} + +void uht_writer_free(struct uht_writer *wr) +{ + struct uht_entry *e, *tmp; + + avl_remove_all_elements(&wr->data, e, node, tmp) + free(e); + free(wr->buf); + memset(wr, 0, sizeof(*wr)); +} + +static inline uint32_t +__uht_iter_fetch(struct uht_reader_iter *iter) +{ + return le32_to_cpu(*(iter->__data++)); +} + +struct uht_reader_iter +__uht_object_iter_init(struct uht_reader *r, uint32_t attr) +{ + struct uht_reader_iter iter = { + .type = uht_entry_type(attr), + }; + + switch (iter.type) { + case UHT_HASHTBL: + iter.__data = uht_entry_ptr(r->data, attr); + iter.size = __uht_iter_fetch(&iter); + iter.__data += 1 << (iter.size & UHT_HASHTBL_ORDER_MASK); + iter.size >>= UHT_HASHTBL_SIZE_SHIFT; + break; + case UHT_ARRAY: + case UHT_OBJECT: + iter.__data = uht_entry_ptr(r->data, attr); + iter.size = __uht_iter_fetch(&iter); + break; + default: + break; + } + + if (iter.index < iter.size) + __uht_object_iter_next(r, &iter); + + return iter; +} + +void __uht_object_iter_next(struct uht_reader *r, struct uht_reader_iter *iter) +{ + if (iter->type != UHT_ARRAY) { + uint32_t key = __uht_iter_fetch(iter); + key &= ~UHT_TYPE_MASK; + key |= UHT_STRING; + iter->key = uht_reader_get_string(r, key); + } + iter->val = __uht_iter_fetch(iter); +} + +uint32_t uht_reader_hashtbl_lookup(struct uht_reader *r, uint32_t hashtbl, + const char *key) +{ + uint32_t *ht, *ht_end, val, slot, size, offset; + size_t key_len = strlen(key); + int32_t entry; + uint8_t order; + + if (!uht_entry_valid(r->len, hashtbl)) + return 0; + + ht = uht_entry_ptr(r->data, hashtbl); + val = le32_to_cpu(*ht); + order = val & UHT_HASHTBL_ORDER_MASK; + size = val >> UHT_HASHTBL_SIZE_SHIFT; + offset = (1 << order) + 2 * size; + ht_end = ht + offset; + offset <<= 2 + UHT_TYPE_BITS - UHT_ALIGN_BITS; + + if (!uht_entry_valid(r->len, hashtbl + offset)) + return 0; + + ht++; + slot = uht_hashtbl_key_slot(key, order); + if (ht + slot >= ht_end) + return 0; + + entry = le32_to_cpu(ht[slot]) * 2; + if ((uint32_t)entry > 2 * size) + return 0; + + ht += 1 << order; + while (entry >= 0) { + uint32_t cur_entry = le32_to_cpu(ht[entry]); + const char *cur_key; + size_t off; + + cur_entry &= ~UHT_TYPE_MASK; + cur_entry |= UHT_STRING; + if (!uht_entry_valid(r->len, cur_entry)) + return 0; + + cur_key = uht_reader_get_string(r, cur_entry); + off = cur_key - (const char *)r->data; + if (off + key_len >= r->len) + return 0; + + if (!strncmp(key, cur_key, key_len + 1)) { + cur_entry = le32_to_cpu(ht[entry + 1]); + if (!uht_entry_valid(r->len, cur_entry)) + return 0; + + return cur_entry; + } + + if (ht[entry] & cpu_to_le32(UHT_HASHTBL_KEY_FLAG_FIRST)) + return 0; + + entry -= 2; + } + + return 0; +} + +int uht_reader_open(struct uht_reader *r, const char *file) +{ + const struct uht_file_hdr *hdr; + struct stat st; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) + return -1; + + if (fstat(fd, &st) < 0) + goto close_fd; + + if (st.st_size < (off_t)sizeof(struct uht_file_hdr)) + goto close_fd; + + r->fd = fd; + r->data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + r->len = st.st_size; + hdr = r->data; + r->val = hdr->val; + + return 0; + +close_fd: + close(fd); + return -1; +} + +void uht_reader_close(struct uht_reader *r) +{ + munmap(r->data, r->len); + close(r->fd); +} diff --git a/src/uht.h b/src/uht.h new file mode 100644 index 0000000..73b75a9 --- /dev/null +++ b/src/uht.h @@ -0,0 +1,139 @@ +#ifndef __UHT_H +#define __UHT_H + +#include +#include +#include +#include +#include + +#define UHT_TYPE_MASK 0xf +#define UHT_TYPE_BITS 4 +#define UHT_ALIGN_BITS 2 + +#define UHT_ALIGN_MASK ((1 << UHT_ALIGN_BITS) - 1) + +enum uht_type { + UHT_NULL, /* can also be used as value */ + UHT_STRING, + UHT_INT, + UHT_DOUBLE, + UHT_BOOL, + UHT_HASHTBL, + UHT_OBJECT, + UHT_ARRAY, +}; + +struct uht_writer { + struct avl_tree data; + void *buf; + size_t buf_ofs; + size_t buf_len; +}; + +uint32_t uht_writer_hashtbl_alloc(struct uht_writer *wr, size_t n_members); +void uht_writer_hashtbl_add_element(struct uht_writer *wr, uint32_t hashtbl, + const char *key, uint32_t val); +void uht_writer_hashtbl_done(struct uht_writer *wr, uint32_t hashtbl); + +uint32_t uht_writer_add_array(struct uht_writer *wr, uint32_t *members, size_t n); +uint32_t uht_writer_add_object(struct uht_writer *wr, uint32_t *keys, + uint32_t *values, size_t n); +uint32_t uht_writer_add_string(struct uht_writer *wr, const char *val); +uint32_t uht_writer_add_double(struct uht_writer *wr, double val); + +static inline uint32_t +uht_writer_add_bool(struct uht_writer *wr, bool val) +{ + return (((uint32_t)val) << UHT_TYPE_BITS) | UHT_BOOL; +} + +uint32_t uht_writer_add_int(struct uht_writer *wr, int64_t val); + +void uht_writer_init(struct uht_writer *wr); +int uht_writer_save(struct uht_writer *wr, FILE *out, uint32_t val); +void uht_writer_free(struct uht_writer *wr); + +static inline uint32_t +uht_entry_offset(uint32_t entry) +{ + return ((entry & ~UHT_TYPE_MASK) >> (UHT_TYPE_BITS - UHT_ALIGN_BITS)); +} + +static inline void * +uht_entry_ptr(void *buf, uint32_t entry) +{ + return buf + uht_entry_offset(entry); +} + +static inline enum uht_type +uht_entry_type(uint32_t attr) +{ + return attr & UHT_TYPE_MASK; +} + +static inline bool +uht_entry_valid(size_t len, uint32_t attr) +{ + return uht_entry_offset(attr) + 4 <= len; +} + +struct uht_reader { + void *data; + size_t len; + uint32_t val; + int fd; +}; + +struct uht_reader_iter { + uint32_t *__data; + uint8_t type; + + uint32_t index, size; + const char *key; + uint32_t val; +}; + +int uht_reader_open(struct uht_reader *r, const char *file); +void uht_reader_close(struct uht_reader *r); + +static inline const char * +uht_reader_get_string(struct uht_reader *r, uint32_t attr) +{ + return uht_entry_ptr(r->data, attr); +} + +static inline int64_t +uht_reader_get_int(struct uht_reader *r, uint32_t attr) +{ + return le64_to_cpu(*(int64_t *)uht_entry_ptr(r->data, attr)); +} + +static inline bool +uht_reader_get_bool(struct uht_reader *r, uint32_t attr) +{ + return !!(attr >> UHT_TYPE_BITS); +} + +static inline double +uht_reader_get_double(struct uht_reader *r, uint32_t attr) +{ + union { + double d; + uint64_t u64; + } v = { + .u64 = le64_to_cpu(*(uint64_t *)uht_entry_ptr(r->data, attr)) + }; + return v.d; +} + +void __uht_object_iter_next(struct uht_reader *r, struct uht_reader_iter *iter); +struct uht_reader_iter __uht_object_iter_init(struct uht_reader *r, uint32_t attr); +uint32_t uht_reader_hashtbl_lookup(struct uht_reader *r, uint32_t hashtbl, + const char *key); + +#define uht_for_each(r, iter, attr) \ + for (struct uht_reader_iter iter = __uht_object_iter_init(r, attr); \ + iter.index < iter.size; __uht_object_iter_next(r, &iter), iter.index++) + +#endif diff --git a/src/xxhash32.c b/src/xxhash32.c new file mode 100644 index 0000000..da005d5 --- /dev/null +++ b/src/xxhash32.c @@ -0,0 +1,146 @@ +/* + * xxHash - Fast Hash algorithm + * Copyright (C) 2012-2020 Yann Collet + * Copyright (C) 2019-2020 Devin Hussey (easyaspi314) + * + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - xxHash homepage: http://www.xxhash.com + * - xxHash source repository : https://github.com/Cyan4973/xxHash */ + +/* This is a compact, 100% standalone reference XXH32 single-run implementation. + * Instead of focusing on performance hacks, this focuses on cleanliness, + * conformance, portability and simplicity. + * + * This file aims to be 100% compatible with C90/C++98, with the additional + * requirement of stdint.h. No library functions are used. */ + +#include /* size_t, NULL */ +#include /* uint8_t, uint32_t */ +#include "xxhash32.h" + +static uint32_t const PRIME32_1 = 0x9E3779B1U; /* 0b10011110001101110111100110110001 */ +static uint32_t const PRIME32_2 = 0x85EBCA77U; /* 0b10000101111010111100101001110111 */ +static uint32_t const PRIME32_3 = 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */ +static uint32_t const PRIME32_4 = 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */ +static uint32_t const PRIME32_5 = 0x165667B1U; /* 0b00010110010101100110011110110001 */ + +/* Rotates value left by amt. */ +static uint32_t XXH_rotl32(uint32_t const value, uint32_t const amt) +{ + return (value << (amt % 32)) | (value >> (32 - (amt % 32))); +} + +/* Portably reads a 32-bit little endian integer from data at the given offset. */ +static uint32_t XXH_read32(uint8_t const *const data, size_t const offset) +{ + return (uint32_t) data[offset + 0] + | ((uint32_t) data[offset + 1] << 8) + | ((uint32_t) data[offset + 2] << 16) + | ((uint32_t) data[offset + 3] << 24); +} + +/* Mixes input into acc. */ +static uint32_t XXH32_round(uint32_t acc, uint32_t const input) +{ + acc += input * PRIME32_2; + acc = XXH_rotl32(acc, 13); + acc *= PRIME32_1; + return acc; +} + +/* Mixes all bits to finalize the hash. */ +static uint32_t XXH32_avalanche(uint32_t hash) +{ + hash ^= hash >> 15; + hash *= PRIME32_2; + hash ^= hash >> 13; + hash *= PRIME32_3; + hash ^= hash >> 16; + return hash; +} + +/* The XXH32 hash function. + * input: The data to hash. + * length: The length of input. It is undefined behavior to have length larger than the + * capacity of input. + * seed: A 32-bit value to seed the hash with. + * returns: The 32-bit calculated hash value. */ +uint32_t XXH32(void const *const input, size_t const length, uint32_t const seed) +{ + uint8_t const *const data = (uint8_t const *) input; + uint32_t hash; + size_t remaining = length; + size_t offset = 0; + + /* Don't dereference a null pointer. The reference implementation notably doesn't + * check for this by default. */ + if (input == NULL) { + return XXH32_avalanche(seed + PRIME32_5); + } + + if (remaining >= 16) { + /* Initialize our accumulators */ + uint32_t acc1 = seed + PRIME32_1 + PRIME32_2; + uint32_t acc2 = seed + PRIME32_2; + uint32_t acc3 = seed + 0; + uint32_t acc4 = seed - PRIME32_1; + + while (remaining >= 16) { + acc1 = XXH32_round(acc1, XXH_read32(data, offset)); offset += 4; + acc2 = XXH32_round(acc2, XXH_read32(data, offset)); offset += 4; + acc3 = XXH32_round(acc3, XXH_read32(data, offset)); offset += 4; + acc4 = XXH32_round(acc4, XXH_read32(data, offset)); offset += 4; + remaining -= 16; + } + + hash = XXH_rotl32(acc1, 1) + XXH_rotl32(acc2, 7) + XXH_rotl32(acc3, 12) + XXH_rotl32(acc4, 18); + } else { + /* Not enough data for the main loop, put something in there instead. */ + hash = seed + PRIME32_5; + } + + hash += (uint32_t) length; + + /* Process the remaining data. */ + while (remaining >= 4) { + hash += XXH_read32(data, offset) * PRIME32_3; + hash = XXH_rotl32(hash, 17); + hash *= PRIME32_4; + offset += 4; + remaining -= 4; + } + + while (remaining != 0) { + hash += (uint32_t) data[offset] * PRIME32_5; + hash = XXH_rotl32(hash, 11); + hash *= PRIME32_1; + --remaining; + ++offset; + } + return XXH32_avalanche(hash); +} diff --git a/src/xxhash32.h b/src/xxhash32.h new file mode 100644 index 0000000..3ce4ba5 --- /dev/null +++ b/src/xxhash32.h @@ -0,0 +1,8 @@ +#ifndef __XXH32_H +#define __XXH32_H + +#include + +uint32_t XXH32(void const *const input, size_t const length, uint32_t const seed); + +#endif -- 2.30.2