diff --git a/README.md b/README.md index 00437df..2d8f99a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Python Streaming Data Types + Utilities for working with the FlatBuffers schemas used at the European Spallation Source ERIC for data transport. @@ -7,7 +8,7 @@ https://github.com/ess-dmsc/streaming-data-types ## FlatBuffer Schemas | name | description | -|------|------------------------------------------------------------------------------| +| ---- | ---------------------------------------------------------------------------- | | hs00 | Histogram schema (deprecated in favour of hs01) | | hs01 | Histogram schema | | ns10 | NICOS cache entry schema | @@ -34,8 +35,10 @@ https://github.com/ess-dmsc/streaming-data-types | json | Generic JSON data | | se00 | Arrays with optional timestamps, for example waveform data. Replaces _senv_. | | da00 | Scipp-like data arrays, for histograms, etc. | +| vs00 | Log data à la f144, but for strings | ### hs00 and hs01 + Schema for histogram data. It is one of the more complicated to use schemas. It takes a Python dictionary as its input; this dictionary needs to have correctly named fields. @@ -43,6 +46,7 @@ named fields. The input histogram data for serialisation and the output deserialisation data have the same dictionary "layout". Example for a 2-D histogram: + ```json hist = { "source": "some_source", @@ -68,10 +72,10 @@ hist = { "info": "info_string", } ``` + The arrays passed in for `data`, `errors` and `bin_boundaries` can be NumPy arrays or regular lists, but on deserialisation they will be NumPy arrays. - ## Developer documentation See [README_DEV.md](README_DEV.md) diff --git a/README_DEV.md b/README_DEV.md index 7f826d0..09fbe3d 100644 --- a/README_DEV.md +++ b/README_DEV.md @@ -1,45 +1,57 @@ # Python Streaming Data Types + ## For developers ### Install the commit hooks (important) + There are commit hooks for Black and Flake8. The commit hooks are handled using [pre-commit](https://pre-commit.com). To install the hooks for this project run: + ``` pre-commit install ``` To test the hooks run: + ``` pre-commit run --all-files ``` + This command can also be used to run the hooks manually. ### Adding new schemas checklist (important) -* Generate Python bindings for the schema using FlatBuffers' `flatc` executable -* Add the generated bindings to the project -* Add unit-tests (see existing tests for an example) -* Update `fbschemas.__init__.py` to include the new serialiser and deserialiser -* Check whether the serialised data produced by the new code can be verified in C++ - * There is a helper program in the [FlatBufferVerification](https://github.com/ess-dmsc/FlatBufferVerification) repository - * Don't worry if it fails verification - it seems to be an inherent FlatBuffers issue + +- Generate Python bindings for the schema using FlatBuffers' `flatc` executable + e.g. `flatc --python -o streaming_data_types/name_xx##.py ../streaming-data-types/schemas/xx##_name.fbs` + `flatc` comes from the `flatbuffers` package, so make sure you have that installed first +- Add the generated bindings to the project +- Add unit-tests (see existing tests for an example) +- Update `fbschemas.__init__.py` to include the new serialiser and deserialiser +- Check whether the serialised data produced by the new code can be verified in C++ + - There is a helper program in the [FlatBufferVerification](https://github.com/ess-dmsc/FlatBufferVerification) repository + - Don't worry if it fails verification - it seems to be an inherent FlatBuffers issue ### Tox + Tox allows the unit tests to be run against multiple versions of Python. See the tox.ini file for which versions are supported. From the top directory: + ``` tox ``` ### Installing the development version locally + First, uninstall any existing versions of the Python streaming data types package: ``` pip uninstall ess-streaming-data-types ``` + Then, from the _python-streaming-data-types_ root directory, run the following command: ``` @@ -49,52 +61,63 @@ pip install --user -e ./ ### Building the package locally and deploying it to PyPI #### Requirements -* A [PyPi](https://pypi.org/) account -* A [TestPyPi](https://test.pypi.org/) account (this is separate to the PyPi account) -* Permission to push to the ess-streaming-data-types project on TestPyPi and PyPi -* Installed all requirements in `requirements-dev.txt` + +- A [PyPi](https://pypi.org/) account +- A [TestPyPi](https://test.pypi.org/) account (this is separate to the PyPi account) +- Permission to push to the ess-streaming-data-types project on TestPyPi and PyPi +- Installed all requirements in `requirements-dev.txt` #### Steps -***First update the __version__ number in streaming_data_types/__init__.py and push the update to the repository.*** +**\_First update the **version** number in streaming*data_types/\_version.py and push the update to the repository.*** Uninstall streaming_data_types if you have previously installed it from PyPi: + ``` pip uninstall ess_streaming_data_types ``` Delete any old builds you may have (IMPORTANT!): + ``` rm -rf build dist ``` Build it locally: + ``` python setup.py sdist bdist_wheel ``` Check dist files: + ``` twine check dist/* ``` Push to test.pypi.org for testing: + ``` -twine upload --repository-url https://test.pypi.org/legacy/ dist/* +twine upload --repository-url https://test.pypi.org/legacy/ dist/* ``` The new module can then be installed from test.pypi.org like so: + ``` pip uninstall ess-streaming-data-types # Remove old version if present pip install -i https://test.pypi.org/simple/ ess-streaming-data-types ``` + Unfortunately, flatbuffers is not on test.pypi.org so the following error may occur: + ``` ERROR: Could not find a version that satisfies the requirement flatbuffers ``` + The workaround is to install flatbuffers manually first using `pip install flatbuffers` and then rerun the previous command. Test the module using the existing test-suite (from project root): + ``` rm -rf streaming_data_types # Rename the local source directory pytest # The tests will be run against the pip installed module @@ -102,7 +125,9 @@ git reset --hard origin/main # Put everything back to before ``` After testing installing from test.pypi.org works, push to PyPI: + ``` twine upload dist/* ``` + Finally, create a tag on the GitHub repository with the appropriate name, e.g. `v0.7.0`. diff --git a/requirements-dev.txt b/requirements-dev.txt index 05210d8..89421c6 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,10 +1,11 @@ -r requirements.txt black flake8 +id<1.6 # required by twine but stuck by conan urllib3 requirement isort pre-commit pytest -tox==3.27.1 # tox 4 seems to be broken at the moment +tox tox-pyenv twine wheel diff --git a/setup.py b/setup.py index 96bcc1f..1a7e82b 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,9 @@ from setuptools import find_packages, setup -from streaming_data_types._version import version +version = {} +with open("streaming_data_types/_version.py") as f: + exec(f.read(), version) DESCRIPTION = "Python utilities for handling ESS streamed data" @@ -18,7 +20,7 @@ setup( name="ess_streaming_data_types", - version=version, + version=version["version"], description=DESCRIPTION, long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", diff --git a/streaming_data_types/__init__.py b/streaming_data_types/__init__.py index 52e29d9..01acb6f 100644 --- a/streaming_data_types/__init__.py +++ b/streaming_data_types/__init__.py @@ -38,69 +38,72 @@ serialise_senv, ) from streaming_data_types.status_x5f2 import deserialise_x5f2, serialise_x5f2 +from streaming_data_types.stringdata_vs00 import deserialise_vs00, serialise_vs00 from streaming_data_types.timestamps_tdct import deserialise_tdct, serialise_tdct __version__ = version SERIALISERS = { + "6s4t": serialise_6s4t, + "ad00": serialise_ad00, + "ADAr": serialise_ADAr, + "al00": serialise_al00, "an44": serialise_an44, + "answ": serialise_answ, + "ar51": serialise_ar51, + "da00": serialise_da00, + "ep00": serialise_ep00, + "ep01": serialise_ep01, "ev42": serialise_ev42, "ev43": serialise_ev43, "ev44": serialise_ev44, - "hs00": serialise_hs00, - "hs01": serialise_hs01, "f142": serialise_f142, "f144": serialise_f144, + "fc00": serialise_fc00, + "hs00": serialise_hs00, + "hs01": serialise_hs01, + "json": serialise_json, + "NDAr": serialise_ndar, "ns10": serialise_ns10, "pl72": serialise_pl72, - "6s4t": serialise_6s4t, - "x5f2": serialise_x5f2, - "ep00": serialise_ep00, - "ep01": serialise_ep01, - "tdct": serialise_tdct, "rf5k": serialise_rf5k, - "fc00": serialise_fc00, - "answ": serialise_answ, - "wrdn": serialise_wrdn, - "NDAr": serialise_ndar, - "senv": serialise_senv, "se00": serialise_se00, - "ADAr": serialise_ADAr, - "al00": serialise_al00, - "json": serialise_json, - "ad00": serialise_ad00, - "da00": serialise_da00, - "ar51": serialise_ar51, + "senv": serialise_senv, + "tdct": serialise_tdct, + "vs00": serialise_vs00, + "wrdn": serialise_wrdn, + "x5f2": serialise_x5f2, } DESERIALISERS = { + "6s4t": deserialise_6s4t, + "ad00": deserialise_ad00, + "ADAr": deserialise_ADAr, + "al00": deserialise_al00, "an44": deserialise_an44, + "answ": deserialise_answ, + "ar51": deserialise_ar51, + "da00": deserialise_da00, + "ep00": deserialise_ep00, + "ep01": deserialise_ep01, "ev42": deserialise_ev42, "ev43": deserialise_ev43, "ev44": deserialise_ev44, - "hs00": deserialise_hs00, - "hs01": deserialise_hs01, "f142": deserialise_f142, "f144": deserialise_f144, + "fc00": deserialise_fc00, + "hs00": deserialise_hs00, + "hs01": deserialise_hs01, + "json": deserialise_json, + "NDAr": deserialise_ndar, "ns10": deserialise_ns10, "pl72": deserialise_pl72, - "6s4t": deserialise_6s4t, - "x5f2": deserialise_x5f2, - "ep00": deserialise_ep00, - "ep01": deserialise_ep01, - "tdct": deserialise_tdct, "rf5k": deserialise_rf5k, - "fc00": deserialise_fc00, - "answ": deserialise_answ, - "wrdn": deserialise_wrdn, - "NDAr": deserialise_ndar, - "senv": deserialise_senv, "se00": deserialise_se00, - "ADAr": deserialise_ADAr, - "al00": deserialise_al00, - "json": deserialise_json, - "ad00": deserialise_ad00, - "da00": deserialise_da00, - "ar51": deserialise_ar51, + "senv": deserialise_senv, + "tdct": deserialise_tdct, + "vs00": deserialise_vs00, + "wrdn": deserialise_wrdn, + "x5f2": deserialise_x5f2, } diff --git a/streaming_data_types/_version.py b/streaming_data_types/_version.py index 83ee1a0..caf1331 100644 --- a/streaming_data_types/_version.py +++ b/streaming_data_types/_version.py @@ -1,4 +1,4 @@ # Version is not directly defined in __init__ because that causes all # run time dependencies to become build-time dependencies when it is # imported in setup.py -version = "0.27.0" +version = "0.28.0" diff --git a/streaming_data_types/fbschemas/stringdata_vs00/stringdata_vs00.py b/streaming_data_types/fbschemas/stringdata_vs00/stringdata_vs00.py new file mode 100644 index 0000000..26c7bf9 --- /dev/null +++ b/streaming_data_types/fbschemas/stringdata_vs00/stringdata_vs00.py @@ -0,0 +1,122 @@ +# automatically generated by the FlatBuffers compiler, do not modify + +# namespace: + +import flatbuffers +from flatbuffers.compat import import_numpy + +np = import_numpy() + + +class vs00_StringData(object): + __slots__ = ["_tab"] + + @classmethod + def GetRootAs(cls, buf, offset=0): + n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset) + x = vs00_StringData() + x.Init(buf, n + offset) + return x + + @classmethod + def GetRootAsvs00_StringData(cls, buf, offset=0): + """This method is deprecated. Please switch to GetRootAs.""" + return cls.GetRootAs(buf, offset) + + @classmethod + def vs00_StringDataBufferHasIdentifier(cls, buf, offset, size_prefixed=False): + return flatbuffers.util.BufferHasIdentifier( + buf, offset, b"\x76\x73\x30\x30", size_prefixed=size_prefixed + ) + + # vs00_StringData + def Init(self, buf, pos): + self._tab = flatbuffers.table.Table(buf, pos) + + # vs00_StringData + def SourceName(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4)) + if o != 0: + return self._tab.String(o + self._tab.Pos) + return None + + # vs00_StringData + def Timestamp(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6)) + if o != 0: + return self._tab.Get(flatbuffers.number_types.Int64Flags, o + self._tab.Pos) + return 0 + + # vs00_StringData + def Data(self, j): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8)) + if o != 0: + a = self._tab.Vector(o) + return self._tab.String( + a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4) + ) + return "" + + # vs00_StringData + def DataLength(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8)) + if o != 0: + return self._tab.VectorLen(o) + return 0 + + # vs00_StringData + def DataIsNone(self): + o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8)) + return o == 0 + + +def vs00_StringDataStart(builder): + builder.StartObject(3) + + +def Start(builder): + vs00_StringDataStart(builder) + + +def vs00_StringDataAddSourceName(builder, sourceName): + builder.PrependUOffsetTRelativeSlot( + 0, flatbuffers.number_types.UOffsetTFlags.py_type(sourceName), 0 + ) + + +def AddSourceName(builder, sourceName): + vs00_StringDataAddSourceName(builder, sourceName) + + +def vs00_StringDataAddTimestamp(builder, timestamp): + builder.PrependInt64Slot(1, timestamp, 0) + + +def AddTimestamp(builder, timestamp): + vs00_StringDataAddTimestamp(builder, timestamp) + + +def vs00_StringDataAddData(builder, data): + builder.PrependUOffsetTRelativeSlot( + 2, flatbuffers.number_types.UOffsetTFlags.py_type(data), 0 + ) + + +def AddData(builder, data): + vs00_StringDataAddData(builder, data) + + +def vs00_StringDataStartDataVector(builder, numElems): + return builder.StartVector(4, numElems, 4) + + +def StartDataVector(builder, numElems): + return vs00_StringDataStartDataVector(builder, numElems) + + +def vs00_StringDataEnd(builder): + return builder.EndObject() + + +def End(builder): + return vs00_StringDataEnd(builder) diff --git a/streaming_data_types/stringdata_vs00.py b/streaming_data_types/stringdata_vs00.py new file mode 100644 index 0000000..2959b09 --- /dev/null +++ b/streaming_data_types/stringdata_vs00.py @@ -0,0 +1,71 @@ +from collections import namedtuple + +import flatbuffers + +import streaming_data_types.fbschemas.stringdata_vs00.stringdata_vs00 as vs00Message +from streaming_data_types.utils import check_schema_identifier + +FILE_IDENTIFIER = b"vs00" + + +StringData = namedtuple( + "StringData", + ("source_name", "timestamp", "data"), +) + + +def deserialise_vs00(buffer): + """ + Deserialise FlatBuffer vs00. + + :param buffer: The FlatBuffers buffer. + :return: The deserialised data. + """ + check_schema_identifier(buffer, FILE_IDENTIFIER) + + event = vs00Message.vs00_StringData.GetRootAs(buffer, 0) + + return StringData( + event.SourceName().decode("utf-8"), + event.Timestamp(), + [event.Data(i).decode("utf-8") for i in range(event.DataLength())], + ) + + +def serialise_vs00( + source_name: str, + timestamp: int = 0, + data: list[str] = None, +) -> bytes: + """ + Serialise string data as a vs00 FlatBuffers message. + + :param source_name: + :param timestamp: + :param data: + :return: + """ + if data is None: + data = [""] + + builder = flatbuffers.Builder(1024) + source = builder.CreateString(source_name) + string_offsets = [builder.CreateString(s) for s in data] + vs00Message.vs00_StringDataStartDataVector( + builder, + len(string_offsets), + ) + for offset in reversed(string_offsets): + builder.PrependUOffsetTRelative(offset) + + data_vector = builder.EndVector() + + vs00Message.vs00_StringDataStart(builder) + vs00Message.vs00_StringDataAddSourceName(builder, source) + vs00Message.vs00_StringDataAddTimestamp(builder, timestamp) + vs00Message.vs00_StringDataAddData(builder, data_vector) + + end = vs00Message.vs00_StringDataEnd(builder) + builder.Finish(end, file_identifier=FILE_IDENTIFIER) + + return bytes(builder.Output())