-
Notifications
You must be signed in to change notification settings - Fork 19
Add scoped event-time director models #249
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
3938176
0318344
83ea73e
8c2a44a
c22b968
d5f784d
e937968
9ab9deb
aafc098
aae0025
47a8dec
74f89a5
26f7cd2
3fa3f8c
85ca8c2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,195 @@ | ||
| name: ZMQML Hybrid | ||
|
|
||
| on: | ||
| pull_request: | ||
| branches: | ||
| - master | ||
| push: | ||
| branches: | ||
| - master | ||
|
|
||
| env: | ||
| ROSS_REF: 9b6ccb18f9b9db438bf41b5b221d0ef16a4dac48 | ||
| ZMQML_IMAGE: ghcr.io/codes-org/codes-ci-full:latest | ||
|
|
||
| jobs: | ||
| zmqml-hybrid: | ||
| name: zmqml hybrid workflows | ||
| runs-on: ubuntu-24.04 | ||
|
|
||
| steps: | ||
| - name: Checkout CODES | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| path: codes | ||
|
|
||
| - name: Checkout ROSS | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| repository: ROSS-org/ROSS | ||
| ref: ${{ env.ROSS_REF }} | ||
| path: ross | ||
|
|
||
| - name: Pull full dependency image | ||
| run: docker pull "$ZMQML_IMAGE" | ||
|
|
||
| - name: Create Docker network | ||
| run: docker network create codes-zmqml-ci | ||
|
|
||
| - name: Start ZMQML server container | ||
| run: | | ||
| mkdir -p "$PWD/zmqml-artifacts" | ||
|
|
||
| docker run -d \ | ||
| --name zmqml-server \ | ||
| --network codes-zmqml-ci \ | ||
| -v "$PWD/codes:/work/codes" \ | ||
| -v "$PWD/zmqml-artifacts:/work/zmqml-artifacts" \ | ||
| -w /work/codes/src/surrogate/zmqml \ | ||
| -e ZMQML_ITERATION_HISTORY_LEN=2 \ | ||
| -e ZMQML_ITERATION_HORIZON=3 \ | ||
| -e ZMQML_ITERATION_TRAIN_STRIDE=1 \ | ||
| -e ZMQML_EVENT_TIME_MIN_ROWS=4 \ | ||
| -e ZMQML_EVENT_TIME_EPOCHS=2 \ | ||
| -e ZMQML_RECORD_LOG_PATH=/work/zmqml-artifacts/iteration-records.csv \ | ||
| -e ZMQML_EVENT_TIME_RECORD_LOG_PATH=/work/zmqml-artifacts/event-time-records.csv \ | ||
| "$ZMQML_IMAGE" \ | ||
| bash -euxo pipefail -c ' | ||
| apt-get update | ||
| apt-get install -y python3-zmq python3-numpy python3-sklearn python3-pandas python3-pip gettext-base | ||
|
|
||
| python3 -c "import importlib.util, subprocess, sys; sys.exit(0) if importlib.util.find_spec(\"torch\") else subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"--break-system-packages\", \"torch\", \"--index-url\", \"https://download.pytorch.org/whl/cpu\"])" | ||
|
|
||
| exec python3 -u zmqmlserver.py | ||
| ' | ||
|
|
||
| sleep 5 | ||
| docker ps --filter name=zmqml-server | ||
| docker logs zmqml-server | ||
|
|
||
| - name: Build ROSS and CODES with ZMQML | ||
| run: | | ||
| docker run --rm \ | ||
| --name codes-zmqml-build \ | ||
| --network codes-zmqml-ci \ | ||
| -v "$PWD/codes:/work/codes" \ | ||
| -v "$PWD/ross:/work/ross" \ | ||
| -v "$PWD/ross-install:/work/ross-install" \ | ||
| -w /work \ | ||
| "$ZMQML_IMAGE" \ | ||
| bash -euxo pipefail -c ' | ||
| apt-get update | ||
| apt-get install -y python3-zmq python3-numpy python3-sklearn python3-pandas python3-pip gettext-base | ||
| python3 -c "import importlib.util, subprocess, sys; sys.exit(0) if importlib.util.find_spec(\"torch\") else subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"--break-system-packages\", \"torch\", \"--index-url\", \"https://download.pytorch.org/whl/cpu\"])" | ||
|
|
||
| cmake -S ross -B ross/build -G Ninja \ | ||
| -DCMAKE_BUILD_TYPE=Debug \ | ||
| -DROSS_BUILD_MODELS=ON \ | ||
| -DCMAKE_INSTALL_PREFIX=/work/ross-install | ||
|
|
||
| cmake --build ross/build --target install -j | ||
|
|
||
| cd /work/codes | ||
| rm -rf build | ||
|
|
||
| cmake -S . -B build -G Ninja \ | ||
| -DCMAKE_BUILD_TYPE=Debug \ | ||
| -DBUILD_TESTING=ON \ | ||
| -DCODES_USE_SWM=ON \ | ||
| -DCODES_USE_TORCH=ON \ | ||
| -DCODES_USE_ZEROMQ=ON \ | ||
| -DCODES_ENABLE_ZMQML_HYBRID_TESTS=ON \ | ||
| -DCMAKE_C_COMPILER=mpicc \ | ||
| -DCMAKE_CXX_COMPILER=mpicxx \ | ||
| -DCMAKE_PREFIX_PATH="/work/ross-install;/opt/swm;/opt/argobots" \ | ||
| -DTorch_DIR="$(python3 -c "import torch; print(torch.utils.cmake_prefix_path)")/Torch" | ||
|
|
||
| cmake --build build -j | ||
| ' | ||
|
|
||
| - name: Run ZMQML hybrid tests | ||
| run: | | ||
| docker run --rm \ | ||
| --name codes-zmqml-tests \ | ||
| --network codes-zmqml-ci \ | ||
| -v "$PWD/codes:/work/codes" \ | ||
| -v "$PWD/zmqml-artifacts:/work/zmqml-artifacts" \ | ||
| -w /work/codes \ | ||
| -e ZMQML_ENDPOINT=tcp://zmqml-server:5555 \ | ||
| -e ZMQML_TEST_NP=1 \ | ||
| -e ZMQML_CTL_TIMEOUT=30 \ | ||
| "$ZMQML_IMAGE" \ | ||
| bash -euxo pipefail -c ' | ||
| apt-get update | ||
| apt-get install -y python3-zmq python3-numpy python3-sklearn python3-pandas python3-pip gettext-base | ||
| python3 -c "import importlib.util, subprocess, sys; sys.exit(0) if importlib.util.find_spec(\"torch\") else subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"--break-system-packages\", \"torch\", \"--index-url\", \"https://download.pytorch.org/whl/cpu\"])" | ||
|
|
||
| ctest --test-dir build -N \ | ||
| -R "zmqml-(iteration-time|event-time)-hybrid-workflow.sh" \ | ||
| | tee /tmp/zmqml-ctest-list.txt | ||
|
|
||
| grep -E "Test #[0-9]+: zmqml-(iteration-time|event-time)-hybrid-workflow.sh" \ | ||
| /tmp/zmqml-ctest-list.txt | ||
|
|
||
| ctest --test-dir build \ | ||
| -R "zmqml-(iteration-time|event-time)-hybrid-workflow.sh" \ | ||
| --output-on-failure \ | ||
| --timeout 1200 \ | ||
| -VV | ||
| ' | ||
|
|
||
| - name: Validate ZMQML server logs | ||
| run: | | ||
| mkdir -p "$PWD/zmqml-artifacts" | ||
| docker logs zmqml-server 2>&1 | tee "$PWD/zmqml-artifacts/zmqml-server.log" | ||
|
|
||
| require_log() { | ||
| local pattern="$1" | ||
| local description="$2" | ||
|
|
||
| if ! grep -nE "$pattern" "$PWD/zmqml-artifacts/zmqml-server.log"; then | ||
| echo "::error::Missing server-side ZMQML evidence: $description" | ||
| exit 1 | ||
| fi | ||
| } | ||
|
|
||
| require_log '\[zmqmlserver\] director_debug_prints=1' \ | ||
| 'simulation configured the server debug flag' | ||
|
|
||
| require_log '\[iteration-time records\]' \ | ||
| 'iteration-time records reached the server' | ||
|
|
||
| require_log '\[iteration-time inference\].*predictions=' \ | ||
| 'iteration-time inference reached the server and returned predictions' | ||
|
|
||
| require_log '\[event-time records\]' \ | ||
| 'event-time records reached the server' | ||
|
|
||
| require_log '\[event-time inference\].*predictions=' \ | ||
| 'event-time inference reached the server and returned predictions' | ||
|
|
||
| test -s "$PWD/zmqml-artifacts/iteration-records.csv" | ||
| test -s "$PWD/zmqml-artifacts/event-time-records.csv" | ||
|
|
||
| - name: Dump ZMQML server logs | ||
| if: always() | ||
| run: docker logs zmqml-server || true | ||
|
|
||
| - name: Stop ZMQML server | ||
| if: always() | ||
| run: | | ||
| docker rm -f zmqml-server || true | ||
| docker network rm codes-zmqml-ci || true | ||
|
|
||
| - name: Upload ZMQML logs | ||
| if: always() | ||
| uses: actions/upload-artifact@v4 | ||
| with: | ||
| name: zmqml-hybrid-logs | ||
| path: | | ||
| codes/build/Testing/Temporary/LastTest.log | ||
| codes/build/Testing/Temporary/LastTestsFailed.log | ||
| codes/build/testing-output/** | ||
| zmqml-artifacts/** | ||
| if-no-files-found: ignore | ||
| retention-days: 14 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -77,6 +77,59 @@ set(ARGOBOTS_PKG_CONFIG_PATH "" CACHE PATH "DEPRECATED: use CMAKE_PREFIX_PATH. W | |
| # dirs, the MPI dependency, and link libraries (linked in src/CMakeLists.txt). | ||
| find_package(ROSS CONFIG REQUIRED) | ||
|
|
||
| # Compatibility for older ROSS CMake package configs. | ||
| # Some ROSS installs provide ROSSConfig.cmake but do not define the modern | ||
| # imported target ROSS::ROSS. This CODES tree links against ROSS::ROSS, so | ||
| # synthesize that target from the installed ROSS prefix when needed. | ||
| if(NOT TARGET ROSS::ROSS) | ||
| message(WARNING "ROSS package did not define ROSS::ROSS; creating compatibility imported target.") | ||
|
Comment on lines
+84
to
+85
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a ci job (can be build only, I don't think it's necessary to run the test suite) that builds CODES with an old ROSS. That way we can be sure this continues to work. Also change the warning message to a deprecation message (see elsewhere in this file where deprecation is used), because I don't think we should keep this around forever. |
||
|
|
||
| if(DEFINED ROSS_DIR) | ||
| get_filename_component(_ROSS_CONFIG_DIR "${ROSS_DIR}" ABSOLUTE) | ||
| else() | ||
| set(_ROSS_CONFIG_DIR "") | ||
| endif() | ||
|
|
||
| # In this install layout, ROSSConfig.cmake is under: | ||
| # <prefix>/lib/ROSSConfig.cmake | ||
| # so the prefix is one directory above ROSS_DIR. | ||
| get_filename_component(_ROSS_PREFIX "${_ROSS_CONFIG_DIR}/.." ABSOLUTE) | ||
|
|
||
| find_library(_ROSS_COMPAT_LIBRARY | ||
| NAMES ROSS ross | ||
| PATHS | ||
| "${_ROSS_PREFIX}/lib" | ||
| "${_ROSS_CONFIG_DIR}" | ||
| NO_DEFAULT_PATH | ||
| ) | ||
|
|
||
| find_path(_ROSS_COMPAT_INCLUDE_DIR | ||
| NAMES ross.h | ||
| PATHS | ||
| "${_ROSS_PREFIX}/include" | ||
| "${CMAKE_CURRENT_SOURCE_DIR}/../ross/core" | ||
| "$ENV{HOME}/ross/core" | ||
| NO_DEFAULT_PATH | ||
| ) | ||
|
|
||
| if(NOT _ROSS_COMPAT_LIBRARY) | ||
| message(FATAL_ERROR "Could not locate ROSS library for compatibility target. Checked ${_ROSS_PREFIX}/lib and ${_ROSS_CONFIG_DIR}.") | ||
| endif() | ||
|
|
||
| if(NOT _ROSS_COMPAT_INCLUDE_DIR) | ||
| message(FATAL_ERROR "Could not locate ross.h for compatibility target. Checked ${_ROSS_PREFIX}/include and ~/ross/core.") | ||
| endif() | ||
|
|
||
| add_library(ROSS::ROSS UNKNOWN IMPORTED) | ||
| set_target_properties(ROSS::ROSS PROPERTIES | ||
| IMPORTED_LOCATION "${_ROSS_COMPAT_LIBRARY}" | ||
| INTERFACE_INCLUDE_DIRECTORIES "${_ROSS_COMPAT_INCLUDE_DIR}" | ||
| ) | ||
|
|
||
| message(STATUS "Using compatibility ROSS::ROSS library: ${_ROSS_COMPAT_LIBRARY}") | ||
| message(STATUS "Using compatibility ROSS::ROSS include dir: ${_ROSS_COMPAT_INCLUDE_DIR}") | ||
| endif() | ||
|
|
||
| # PkgConfig discovers the optional SWM/UNION/ARGOBOTS deps below (as imported | ||
| # targets). The recommended way to point at a non-standard install is | ||
| # CMAKE_PREFIX_PATH (pkg_check_modules searches <prefix>/lib/pkgconfig etc. under | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
with #255 updating the ci image, this is no longer needed., same for the other places this is done. in the future we should update the docker image with dependencies like this instead of grabbing them in every job. just note that it has to be done in a separate PR because the job that creates the docker image does it on push to master only when the dockerfile changes.