From 5c61bc69f7508afaf030f7f4a8ce11a4b4a43eaf Mon Sep 17 00:00:00 2001 From: Enrico Zini Date: Tue, 17 Oct 2023 16:54:23 +0200 Subject: [PATCH] Python bindings for Scanner::scan_data. refs: #317 --- python/scan.cc | 42 +++++++++++++++++++++++++++++++--- python/tests/test_scan_grib.py | 14 ++++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/python/scan.cc b/python/scan.cc index 533662bb..f7941104 100644 --- a/python/scan.cc +++ b/python/scan.cc @@ -667,6 +667,43 @@ struct get_scanner : public ClassMethKwargs } }; +struct scan_data : public MethKwargs +{ + constexpr static const char* name = "scan_data"; + constexpr static const char* signature = "data: bytes"; + constexpr static const char* returns = "arkimet.Metadata"; + constexpr static const char* summary = "Scan a memory buffer"; + constexpr static const char* doc = R"( +Returns a Metadata with inline source. +)"; + static PyObject* run(Impl* self, PyObject* args, PyObject* kw) + { + static const char* kwlist[] = { "data", nullptr }; + PyObject* arg_data = nullptr; + + if (!PyArg_ParseTupleAndKeywords(args, kw, "O", (char**)kwlist, &arg_data)) + return nullptr; + + try { + char* buffer; + Py_ssize_t length; + if (PyBytes_Check(arg_data)) + { + if (PyBytes_AsStringAndSize(arg_data, &buffer, &length) == -1) + throw PythonException(); + } else { + PyErr_Format(PyExc_TypeError, "data has type %R instead of bytes", arg_data); + return nullptr; + } + + // FIXME: memory copy, seems unavoidable at the moment + std::vector data(buffer, buffer+length); + auto md = self->scanner->scan_data(data); + return (PyObject*)metadata_create(md); + } ARKI_CATCH_RETURN_PYO + } +}; + struct ScannerDef : public Type { constexpr static const char* name = "Scanner"; @@ -674,9 +711,8 @@ struct ScannerDef : public Type constexpr static const char* doc = R"( Scanner for binary data. )"; - GetSetters getsetters; - Methods methods; + GetSetters<> getsetters; + Methods methods; static void _dealloc(Impl* self) { diff --git a/python/tests/test_scan_grib.py b/python/tests/test_scan_grib.py index e052641d..6bdbfd4d 100644 --- a/python/tests/test_scan_grib.py +++ b/python/tests/test_scan_grib.py @@ -42,6 +42,20 @@ def test_scanner(self): scanner = arki.scan.Scanner.get_scanner("GRIB") self.assertEqual(str(scanner), "scanner:grib") + with open("inbound/test.grib1", "rb") as fd: + md = scanner.scan_data(fd.read()) + + self.assertEqual(md["origin"], "GRIB1(200, 000, 101)") + self.assertEqual(md["product"], "GRIB1(200, 140, 229)") + self.assertEqual(md["level"], "GRIB1(001)") + self.assertEqual(md["timerange"], "GRIB1(000, 000h)") + self.assertEqual( + md["area"], + "GRIB(Ni=97, Nj=73, latfirst=40000000, latlast=46000000, lonfirst=12000000, lonlast=20000000, type=0)") + self.assertEqual(md["proddef"], "GRIB(tod=1)") + self.assertEqual(md["reftime"], "2007-07-08T13:00:00Z") + self.assertEqual(md["run"], "MINUTE(13:00)") + def test_compact(self): """ Scan a well-known grib file, with no padding between messages