diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index d98352b4..00000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.gitignore b/.gitignore index 63357d2a..76a13d8f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,7 @@ __pycache__ dist poetry.lock -local/ \ No newline at end of file +local/ +third_party/ + +.DS_Store \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9707c9ae..bc6ee2df 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -57,8 +57,4 @@ Emails: - - - - diff --git a/README.md b/README.md index ca8c6a62..d7e0de74 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,8 @@ Now, we're excited to introduce ***DetectionMetrics v2***! While retaining the f LiDAR - Rellis3D, GOOSE, custom GAIA format - PyTorch (tested with RandLA-Net and KPConv from Open3D-ML) + Rellis3D, GOOSE, WildScenes, custom GAIA format + PyTorch (tested with Open3D-ML, mmdetection3d, SphereFormer, and LSK3DNet models) Object detection Image @@ -94,6 +94,9 @@ Install your deep learning framework of preference in your environment. We have If you are using LiDAR, Open3D currently requires `torch==2.2*`. +### Additional environments +Some LiDAR segmentation models, such as SphereFormer and LSK3DNet, require a dedicated installation workflow. Refer to [additional_envs/INSTRUCTIONS.md](additional_envs/INSTRUCTIONS.md) for detailed setup instructions. + # Usage DetectionMetrics can be used in three ways: through the **interactive GUI** (detection only), as a **Python library**, or via the **command-line interface** (segmentation and detection). @@ -166,8 +169,9 @@ Our previous release, ***DetectionMetrics v1***, introduced a versatile suite fo doi = {10.3390/s22124575}, } ``` + # How to Contribute _To make your first contribution, follow this [Guide](https://github.com/JdeRobot/DetectionMetrics/blob/master/CONTRIBUTING.md)._ # Acknowledgements -Utils for LiDAR segmentation, such as sampling or recentering, are based on [Open3D-ML](https://github.com/isl-org/Open3D-ML). +LiDAR segmentation support is built upon open-source work from [Open3D-ML](https://github.com/isl-org/Open3D-ML), [mmdetection3d](https://github.com/open-mmlab/mmdetection3d), [SphereFormer](https://github.com/dvlab-research/SphereFormer), and [LSK3DNet](https://github.com/FengZicai/LSK3DNet). diff --git a/THIRD_PARTY_NOTICES.md b/THIRD_PARTY_NOTICES.md new file mode 100644 index 00000000..934161fa --- /dev/null +++ b/THIRD_PARTY_NOTICES.md @@ -0,0 +1,178 @@ +This project includes third-party software components. +The following licenses and notices apply to the indicated components. + +--- + +## Third-Party Components + +### SphereFormer +This project includes modified code derived from [SphereFormer](https://github.com/dvlab-research/SphereFormer), licensed under the Apache License, Version 2.0. + +### LSK3DNet +This project includes modified code derived from [LSK3DNet](https://github.com/FengZicai/LSK3DNet), licensed under the MIT License. + +### Open3D-ML +This project includes modified code derived from [Open3D-ML](https://github.com/isl-org/Open3D-ML), licensed under the MIT License. + +--- + +## Apache License, Version 2.0 (SphereFormer) + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity alleging that the Work + or a Contribution incorporated within the Work constitutes patent + infringement, then any patent licenses granted to You under this + License for that Work shall terminate as of the date such litigation + is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works. + + END OF TERMS AND CONDITIONS + +--- + +## MIT License (LSK3DNet) + +MIT License + +Copyright (c) 2024 Tuo Feng + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--- + +## MIT License (Open3D-ML) + +The MIT License (MIT) + +Open3D: www.open3d.org +Copyright (c) 2020 www.open3d.org + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/additional_envs/INSTRUCTIONS.md b/additional_envs/INSTRUCTIONS.md new file mode 100644 index 00000000..c45e8d49 --- /dev/null +++ b/additional_envs/INSTRUCTIONS.md @@ -0,0 +1,201 @@ +# Additional environments + +Some LiDAR segmentation backends require a dedicated Python version and separate virtual environment. +These setups are not compatible with the default installation workflow. + +--- + +## MMDetection3D + +### Python version +- **Python 3.10** (recommended) + +### Create and activate a virtual environment +```bash +python3.10 -m venv .venv-mmdet3d +source .venv-mmdet3d/bin/activate +python -m pip install -U pip setuptools wheel +``` + +### Install dependencies (CUDA 11.7) +```bash +pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 +pip install openmim +mim install mmengine +pip install mmcv==2.1.0 -f https://download.openmmlab.com/mmcv/dist/cu117/torch2.0/index.html +mim install "mmdet>=3.0.0" +mim install "mmdet3d>=1.1.0" +``` + +### Install TorchSparse + +#### Option A (with sudo) +```bash +sudo apt update +sudo apt install -y gcc-11 g++-11 nvidia-cuda-toolkit python3.10-dev +sudo apt install -y libsparsehash-dev + +export CC=/usr/bin/gcc-11 +export CXX=/usr/bin/g++-11 +export FORCE_CUDA=1 +pip install --upgrade git+https://github.com/mit-han-lab/torchsparse.git@v1.4.0 +``` + +#### Option B (no sudo / Option A fails) + +##### 1) Install CUDA 11.7 locally +```bash +wget https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run +chmod +x cuda_11.7.0_515.43.04_linux.run + +mkdir -p "$HOME/cuda-11.7" +./cuda_11.7.0_515.43.04_linux.run \ + --toolkit --override \ + --installpath="$HOME/cuda-11.7" + +export CUDA_HOME="$HOME/cuda-11.7" +export PATH="$CUDA_HOME/bin:$PATH" +export LD_LIBRARY_PATH="$CUDA_HOME/lib64:$LD_LIBRARY_PATH" +``` + +##### 2) Install Google's SparseHash locally +```bash +PREFIX=$HOME/local +mkdir -p "$PREFIX" && cd /tmp +wget -q https://github.com/sparsehash/sparsehash/archive/refs/tags/sparsehash-2.0.4.tar.gz +tar xzf sparsehash-2.0.4.tar.gz +cd sparsehash-sparsehash-2.0.4 +./configure --prefix="$PREFIX" +make -j"$(nproc)" && make install # headers land in $PREFIX/include/google/ +export CPLUS_INCLUDE_PATH="$PREFIX/include:$CPLUS_INCLUDE_PATH" +``` + +##### 3) Install TorchSparse +```bash +export FORCE_CUDA=1 +pip install --upgrade git+https://github.com/mit-han-lab/torchsparse.git@v1.4.0 +``` + +--- + +## SphereFormer + +### Python version +- **Python 3.7** (required) + +### Create and activate a virtual environment +```bash +python3.7 -m venv .venv-sphereformer +source .venv-sphereformer/bin/activate +python -m pip install -U pip setuptools wheel +``` + +### Install dependencies +```bash +pip install typing-extensions==4.7.1 + +pip install torch==1.8.0+cu111 torchvision==0.9.0+cu111 \ + -f https://download.pytorch.org/whl/torch_stable.html + +pip install torch_scatter==2.0.9 +pip install torch_geometric==1.7.2 +pip install spconv-cu114==2.1.25 +pip install torch_sparse==0.6.12 cumm-cu114==0.2.8 torch_cluster==1.5.9 + +pip install safetensors==0.3.3 +pip install tensorboard timm termcolor tensorboardX +``` + +### Clone SphereFormer and build its SparseTransformer +```bash +mkdir -p third_party && cd third_party +git clone https://github.com/dvlab-research/SphereFormer.git +cd SphereFormer/third_party/SparseTransformer +python setup.py install +``` + +### Switch to the SphereFormer-specific pyproject.toml and install DetectionMetrics +Run the following from the repository root: +```bash +cd ../../.. +mv pyproject.toml pyproject-core.toml +cp additional_envs/pyproject-sphereformer.toml pyproject.toml + +pip install -e . +``` + +### Add SphereFormer to PYTHONPATH +Run the following from the repository root: +```bash +export PYTHONPATH="$PYTHONPATH:$(pwd)/third_party/SphereFormer" +``` + +--- + +## LSK3DNet + +### Python version +- **Python 3.9** (required) + +Ensure `python3.9-dev` and `python3.9-distutils` are available. + +### Create and activate a virtual environment +```bash +python3.9 -m venv .venv-lsk3dnet +source .venv-lsk3dnet/bin/activate +python -m pip install -U pip setuptools wheel +``` + +### Install dependencies (CUDA 11.3) +```bash +pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 \ + --extra-index-url https://download.pytorch.org/whl/cu113 + +pip install numpy==1.23.5 +pip install -r additional_envs/requirements-lsk3dnet.txt + +pip install SharedArray==3.2.4 +pip install pybind11 +``` + +### Build LSK3DNet c_utils +```bash +mkdir -p third_party && cd third_party +git clone https://github.com/FengZicai/LSK3DNet.git + +cd LSK3DNet/c_utils +mkdir -p build && cd build + +cmake -DPYTHON_EXECUTABLE="$(which python)" \ + -Dpybind11_DIR="$(python -m pybind11 --cmakedir)" \ + .. + +make +``` + +### Switch to the LSK3DNet-specific pyproject.toml and install DetectionMetrics +Run the following from the repository root: +```bash +cd ../../../.. + +mv pyproject.toml pyproject-core.toml +cp additional_envs/pyproject-lsk3dnet.toml pyproject.toml + +pip install -e . +``` + +### Add LSK3DNet to PYTHONPATH +Run the following from the repository root: +```bash +export PYTHONPATH="$PYTHONPATH:$(pwd)/third_party/LSK3DNet:$(pwd)/third_party/LSK3DNet/c_utils/build" +``` + +--- + +## Restore the core repository configuration + +If you switched `pyproject.toml` for a backend-specific installation, restore the default setup from the repository root: + +```bash +mv pyproject-core.toml pyproject.toml +``` diff --git a/additional_envs/pyproject-lsk3dnet.toml b/additional_envs/pyproject-lsk3dnet.toml new file mode 100644 index 00000000..8a367c36 --- /dev/null +++ b/additional_envs/pyproject-lsk3dnet.toml @@ -0,0 +1,46 @@ +[tool.poetry] +name = "detectionmetrics-lsk3dnet" +version = "0.0.0" +description = "LSK3DNet-compatible version of DetectionMetrics" +authors = ["JdeRobot", "d.pascualhe "] +readme = "README.md" +license = "LICENSE" +packages = [ + { include = "detectionmetrics" } +] + +[tool.poetry.dependencies] +python = "^3.9" +tqdm = "^4.65.0" +pandas = "^2.2.3" +PyYAML = "^6.0.2" +pyarrow = "^18.0.0" +pillow = "^11.0.0" +numpy = "1.23.5" +opencv-python-headless = "^4.10.0.84" +scikit-learn = "^1.6.0" +open3d = "^0.19.0" +addict = "^2.4.0" +matplotlib = "^3.6.0" +click = "^8.1.8" +tensorboard = "^2.18.0" + +[tool.poetry.group.dev.dependencies] +black = "^24.10.0" +pylint = "^3.3.1" +ipykernel = "^6.29.5" + +[tool.poetry.group.docs.dependencies] +sphinx = "^8.1.3" +sphinx-rtd-theme = "^3.0.2" + +[tool.poetry.group.test.dependencies] +pytest = "^8.0.0" + +[tool.poetry.scripts] +dm_evaluate = "detectionmetrics.cli.evaluate:evaluate" +dm_batch = "detectionmetrics.cli.batch:batch" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/additional_envs/pyproject-sphereformer.toml b/additional_envs/pyproject-sphereformer.toml new file mode 100644 index 00000000..ba668b94 --- /dev/null +++ b/additional_envs/pyproject-sphereformer.toml @@ -0,0 +1,46 @@ +[tool.poetry] +name = "detectionmetrics-sphereformer" +version = "0.0.0" +description = "SphereFormer-compatible version of DetectionMetrics" +authors = ["JdeRobot", "d.pascualhe "] +readme = "README.md" +license = "LICENSE" +packages = [ + { include = "detectionmetrics" } +] + +[tool.poetry.dependencies] +python = "^3.7" +tqdm = "^4.65.0" +pandas = "^1.3.5" +PyYAML = "^5.4.1" +pyarrow = "^12.0.1" +pillow = "9.5.0" +numpy = "1.21.6" +opencv-python-headless = "^4.5.5.64" +scikit-learn = "^1.0.2" +open3d = "^0.13.0" +addict = "^2.4.0" +matplotlib = "^3.5.0" +click = "^8.0.4" +tensorboard = "^2.6.0" + +[tool.poetry.group.dev.dependencies] +black = "^22.3.0" +pylint = "^2.15.0" +ipykernel = "^6.9.1" + +[tool.poetry.group.docs.dependencies] +sphinx = "^4.5.0" +sphinx-rtd-theme = "^1.0.0" + +[tool.poetry.group.test.dependencies] +pytest = "^6.2.5" + +[tool.poetry.scripts] +dm_evaluate = "detectionmetrics.cli.evaluate:evaluate" +dm_batch = "detectionmetrics.cli.batch:batch" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/additional_envs/requirements-lsk3dnet.txt b/additional_envs/requirements-lsk3dnet.txt new file mode 100644 index 00000000..a4816cda --- /dev/null +++ b/additional_envs/requirements-lsk3dnet.txt @@ -0,0 +1,168 @@ +absl-py==1.4.0 +addict==2.4.0 +aiohttp==3.8.3 +aiosignal==1.3.1 +anyio==3.6.2 +argon2-cffi==21.3.0 +argon2-cffi-bindings==21.2.0 +async-timeout==4.0.2 +asynctest==0.13.0 +attrs==22.2.0 +backcall==0.2.0 +beautifulsoup4==4.11.1 +bleach==5.0.1 +cachetools==5.3.0 +ccimport==0.3.7 +certifi==2024.8.30 +cffi==1.15.1 +click==8.1.3 +ConfigArgParse==1.5.3 +cumm-cu113==0.2.9 +cycler==0.11.0 +Cython==0.29.32 +dash==2.8.1 +dash-core-components==2.0.0 +dash-html-components==2.0.0 +dash-table==5.0.0 +debugpy==1.6.4 +decorator==5.1.1 +defusedxml==0.7.1 +descartes==1.1.0 +easydict==1.10 +entrypoints==0.4 +fastjsonschema==2.16.3 +filelock==3.9.1 +fire==0.5.0 +Flask==2.2.3 +fonttools==4.38.0 +frozenlist==1.3.3 +fsspec==2022.11.0 +funcy==2.0 +future==0.18.3 +gensim==4.2.0 +google-auth==2.17.1 +google-auth-oauthlib==0.4.6 +grpcio==1.53.0 +huggingface-hub==0.13.2 +idna==3.4 +imageio==2.27.0 +importlib-metadata==5.2.0 +importlib-resources==5.10.2 +ipykernel==6.16.2 +ipython==7.34.0 +ipython-genutils==0.2.0 +ipywidgets==8.0.4 +itsdangerous==2.1.2 +jedi==0.18.2 +Jinja2==3.1.2 +joblib==1.2.0 +jsonschema==4.17.3 +jupyter==1.0.0 +jupyter-console==6.4.4 +jupyter-server==1.23.5 +jupyter_client==7.4.8 +jupyter_core==4.12.0 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.5 +kiwisolver==1.4.4 +lark==1.1.5 +lightning-utilities==0.5.0 +llvmlite==0.39.1 +Markdown==3.4.3 +MarkupSafe==2.1.2 +matplotlib==3.5.3 +matplotlib-inline==0.1.6 +mistune==2.0.4 +multidict==6.0.4 +nbclassic==0.4.8 +nbclient==0.7.2 +nbconvert==7.2.8 +nbformat==5.5.0 +nest-asyncio==1.5.6 +networkx==2.6.3 +ninja==1.11.1 +nltk==3.8.1 +notebook==6.5.2 +notebook_shim==0.2.2 +numba==0.56.4 +numexpr==2.8.4 +nuscenes-devkit==1.1.9 +oauthlib==3.2.2 +open3d==0.16.0 +opencv-python==4.7.0.68 +packaging==23.0 +pandas==1.3.5 +pandocfilters==1.5.0 +parso==0.8.3 +pccm==0.2.21 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==9.4.0 +pkgutil_resolve_name==1.3.10 +plotly==5.13.1 +plyfile==0.8.1 +portalocker==2.6.0 +prometheus-client==0.15.0 +prompt-toolkit==3.0.36 +protobuf==3.20.1 +psutil==5.9.4 +ptyprocess==0.7.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pybind11==2.10.2 +pycocotools==2.0.6 +pycparser==2.21 +pyDeprecate==0.3.0 +Pygments==2.14.0 +pyparsing==3.0.9 +pyquaternion==0.9.9 +pyrsistent==0.19.3 +python-dateutil==2.8.2 +pytz==2022.7 +PyWavelets==1.3.0 +PyYAML==6.0 +pyzmq==24.0.1 +qtconsole==5.4.0 +QtPy==2.3.0 +requests==2.28.2 +requests-oauthlib==1.3.1 +rsa==4.9 +scikit-image==0.19.3 +scikit-learn==1.0.2 +scipy==1.7.3 +seaborn==0.12.2 +Send2Trash==1.8.0 +shapely==2.0.0 +six==1.16.0 +smart-open==6.3.0 +sniffio==1.3.0 +soupsieve==2.3.2.post1 +spconv-cu113==2.1.21 +strictyaml==1.6.2 +tenacity==8.2.2 +tensorboard==2.11.2 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboardX==2.5.1 +termcolor==2.1.1 +terminado==0.17.1 +threadpoolctl==3.1.0 +tifffile==2021.11.2 +timm==0.6.12 +tinycss2==1.2.1 +torch-scatter==2.1.0 +torchmetrics==0.5.0 +tornado==6.2 +tqdm==4.64.1 +traitlets==5.9.0 +trimesh==3.21.3 +typing_extensions==4.4.0 +urllib3==1.26.15 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.4.2 +Werkzeug==2.2.3 +widgetsnbextension==4.0.5 +yacs==0.1.8 +yarl==1.8.2 +zipp==3.11.0 \ No newline at end of file diff --git a/detectionmetrics/cli/batch.py b/detectionmetrics/cli/batch.py index 0dae173a..a9ebd323 100644 --- a/detectionmetrics/cli/batch.py +++ b/detectionmetrics/cli/batch.py @@ -1,4 +1,4 @@ -from itertools import product +from itertools import product, chain from glob import glob import os @@ -30,9 +30,19 @@ def batch(command, jobs_cfg): for model_cfg in jobs_cfg["model"]: model_path = model_cfg["path"] - model_paths = glob(model_path) if model_cfg["path_is_pattern"] else [model_path] - assert model_paths, f"No files found for pattern {model_cfg['path']}" + is_pattern = model_cfg.get("path_is_pattern", False) + if isinstance(model_path, list): + if is_pattern: + model_paths = list(chain.from_iterable(glob(p) for p in model_path)) + else: + model_paths = model_path + else: + model_paths = glob(model_path) if is_pattern else [model_path] + + if not model_paths: + raise FileNotFoundError(f"No files found for path/pattern: {model_path}") + print(f"Found {len(model_paths)} model(s) for pattern: {model_path}") for new_path in model_paths: assert os.path.exists(new_path), f"File or directory {new_path} not found" @@ -41,7 +51,8 @@ def batch(command, jobs_cfg): if os.path.isfile(new_path): new_model_id, _ = os.path.splitext(new_model_id) - new_model_cfg = model_cfg | { + new_model_cfg = { + **model_cfg, "path": new_path, "id": f"{model_cfg['id']}-{new_model_id.replace('-', '_')}", } @@ -102,9 +113,20 @@ def batch(command, jobs_cfg): "model": model_cfg["path"], "model_ontology": model_cfg["ontology"], "model_cfg": model_cfg["cfg"], - # "image_size": model_cfg.get("image_size", None), } ) + + if command == "computational_cost": + if jobs_cfg["input_type"] == "image": + params["image_size"] = model_cfg.get("image_size", [512, 512]) + elif jobs_cfg["input_type"] == "lidar": + params["point_cloud_range"] = model_cfg.get( + "point_cloud_range", [-50, -50, -5, 50, 50, 5] + ) + params["num_points"] = model_cfg.get("num_points", 100000) + else: + raise ValueError(f"Unknown input type: {jobs_cfg['input_type']}") + if has_dataset: dataset_cfg = job_components[1] params.update( diff --git a/detectionmetrics/cli/computational_cost.py b/detectionmetrics/cli/computational_cost.py index 951f8a47..402b24c7 100644 --- a/detectionmetrics/cli/computational_cost.py +++ b/detectionmetrics/cli/computational_cost.py @@ -1,7 +1,6 @@ import click from detectionmetrics import cli -from detectionmetrics.utils.io import read_json @click.command(name="computational_cost", help="Estimate model computational cost") @@ -12,9 +11,7 @@ # model @click.option( "--model_format", - type=click.Choice( - ["torch", "tensorflow", "tensorflow_explicit"], case_sensitive=False - ), + type=click.Choice(["torch", "tensorflow"], case_sensitive=False), show_default=True, default="torch", help="Trained model format", @@ -39,14 +36,35 @@ ) @click.option( "--image_size", - type=(int, int), + nargs=2, + type=int, required=False, - help="Dummy image size used for computational cost estimation", + help="Dummy image size. Should be provided as two integers: width height", +) +@click.option( + "--point_cloud_range", + nargs=6, + type=int, + required=False, + help="Dummy point cloud range (meters). Should be provided as six integers: x_min y_min z_min x_max y_max z_max", +) +@click.option( + "--num_points", + type=int, + required=False, + help="Number of points for the dummy point cloud (uniformly sampled)", +) +@click.option( + "--has_intensity", + is_flag=True, + default=False, + help="Whether the dummy point cloud has intensity values", ) # output @click.option( "--out_fname", type=click.Path(writable=True), + required=True, help="CSV file where the computational cost estimation results will be stored", ) def computational_cost( @@ -57,23 +75,46 @@ def computational_cost( model_ontology, model_cfg, image_size, + point_cloud_range, + num_points, + has_intensity, out_fname, ): """Estimate model computational cost""" - - if image_size is None: - parsed_model_cfg = read_json(model_cfg) - if "image_size" in parsed_model_cfg: - image_size = parsed_model_cfg["image_size"] - else: + if input_type == "image": + if image_size is None: + raise ValueError("Image size must be provided for image models") + if point_cloud_range is not None or num_points is not None: + raise ValueError( + "Point cloud range and number of points cannot be provided for image models" + ) + if has_intensity: + raise ValueError("Intensity flag cannot be set for image models") + params = {"image_size": image_size} + elif input_type == "lidar": + if point_cloud_range is None or num_points is None: raise ValueError( - "Image size must be provided either as an argument or in the model configuration file" + "Point cloud range and number of points must be provided for lidar models" ) + if image_size is not None: + raise ValueError("Image size cannot be provided for lidar models") + + params = { + "point_cloud_range": point_cloud_range, + "num_points": num_points, + "has_intensity": has_intensity, + } + else: + raise ValueError(f"Unknown input type: {input_type}") model = cli.get_model( task, input_type, model_format, model, model_ontology, model_cfg ) - results = model.get_computational_cost(image_size) + results = model.get_computational_cost(**params) results.to_csv(out_fname) return results + + +if __name__ == "__main__": + computational_cost() diff --git a/detectionmetrics/cli/evaluate.py b/detectionmetrics/cli/evaluate.py index 4fd23ee0..bba5c0c9 100644 --- a/detectionmetrics/cli/evaluate.py +++ b/detectionmetrics/cli/evaluate.py @@ -25,7 +25,7 @@ def parse_split(ctx, param, value): @click.option( "--model_format", type=click.Choice( - ["torch", "tensorflow", "tensorflow_explicit"], case_sensitive=False + ["torch", "tensorflow"], case_sensitive=False ), show_default=True, default="torch", @@ -197,3 +197,7 @@ def evaluate( results.to_csv(out_fname) return results + + +if __name__ == "__main__": + evaluate() diff --git a/detectionmetrics/datasets/__init__.py b/detectionmetrics/datasets/__init__.py index e494f858..503e3839 100644 --- a/detectionmetrics/datasets/__init__.py +++ b/detectionmetrics/datasets/__init__.py @@ -16,7 +16,11 @@ ) from detectionmetrics.datasets.rugd import RUGDImageSegmentationDataset from detectionmetrics.datasets.wildscenes import WildscenesImageSegmentationDataset -from detectionmetrics.datasets.coco import CocoDataset +try: + from detectionmetrics.datasets.coco import CocoDataset +except ImportError: + print("COCO dataset dependencies not available") + CocoDataset = None REGISTRY = { "gaia_image_segmentation": GaiaImageSegmentationDataset, @@ -29,5 +33,7 @@ "rellis3d_lidar_segmentation": Rellis3DLiDARSegmentationDataset, "rugd_image_segmentation": RUGDImageSegmentationDataset, "wildscenes_image_segmentation": WildscenesImageSegmentationDataset, - "coco_image_detection": CocoDataset, } + +if CocoDataset is not None: + REGISTRY["coco_detection"] = CocoDataset \ No newline at end of file diff --git a/detectionmetrics/datasets/gaia.py b/detectionmetrics/datasets/gaia.py index a9ceaaa2..84722038 100644 --- a/detectionmetrics/datasets/gaia.py +++ b/detectionmetrics/datasets/gaia.py @@ -23,8 +23,15 @@ def build_dataset(dataset_fname: str) -> Tuple[pd.DataFrame, str, dict]: dataset_dir = os.path.dirname(dataset_fname) # Read ontology file - ontology_fname = dataset.attrs["ontology_fname"] - ontology = uio.read_json(os.path.join(dataset_dir, ontology_fname)) + try: + ontology_fname = dataset.attrs["ontology_fname"] + except KeyError: + ontology_fname = "ontology.json" + + ontology_fname = os.path.join(dataset_dir, ontology_fname) + assert os.path.isfile(ontology_fname), "Ontology file not found" + + ontology = uio.read_json(ontology_fname) for name, data in ontology.items(): ontology[name]["rgb"] = tuple(data["rgb"]) diff --git a/detectionmetrics/datasets/goose.py b/detectionmetrics/datasets/goose.py index b83f6d9b..34615d60 100644 --- a/detectionmetrics/datasets/goose.py +++ b/detectionmetrics/datasets/goose.py @@ -16,6 +16,7 @@ def build_dataset( train_dataset_dir: Optional[str] = None, val_dataset_dir: Optional[str] = None, test_dataset_dir: Optional[str] = None, + is_goose_ex: bool = False, ) -> Tuple[dict, dict]: """Build dataset and ontology dictionaries from GOOSE dataset structure @@ -31,6 +32,8 @@ def build_dataset( :type val_dataset_dir: str, optional :param test_dataset_dir: Directory containing test data, defaults to None :type test_dataset_dir: str, optional + :param is_goose_ex: Whether the dataset is GOOSE Ex or GOOSE, defaults to False + :type is_goose_ex: bool, optional :return: Dataset and onotology :rtype: Tuple[dict, dict] """ @@ -66,13 +69,23 @@ def build_dataset( train_data = os.path.join(dataset_dir, f"{data_type}/{split}/*/*_{data_suffix}") for data_fname in glob(train_data): sample_dir, sample_base_name = os.path.split(data_fname) - sample_base_name = sample_base_name.split("__")[-1] + + # GOOSE Ex uses a different label file naming convention + if is_goose_ex: + sample_base_name = "sequence" + sample_base_name.split("_sequence")[-1] + else: + sample_base_name = sample_base_name.split("__")[-1] + sample_base_name = sample_base_name.split("_" + data_suffix)[0] scene = os.path.split(sample_dir)[-1] sample_name = f"{scene}-{sample_base_name}" - label_base_name = f"{scene}__{sample_base_name}_{label_suffix}" + if is_goose_ex: + label_base_name = f"{scene}_{sample_base_name}_{label_suffix}" + else: + label_base_name = f"{scene}__{sample_base_name}_{label_suffix}" + label_fname = os.path.join( dataset_dir, "labels", split, scene, label_base_name ) @@ -131,9 +144,9 @@ def __init__( class GOOSELiDARSegmentationDataset(dm_segmentation_dataset.LiDARSegmentationDataset): """Specific class for GOOSE-styled LiDAR segmentation datasets. All data can be downloaded from the official webpage (https://goose-dataset.de): - train -> https://goose-dataset.de/storage/goose_3d_train.zip - val -> https://goose-dataset.de/storage/goose_3d_val.zip - test -> https://goose-dataset.de/storage/goose_3d_test.zip + train -> https://goose-dataset.de/storage/gooseEx_3d_train.zip + val -> https://goose-dataset.de/storage/gooseEx_3d_val.zip + test -> https://goose-dataset.de/storage/gooseEx_3d_test.zip :param train_dataset_dir: Directory containing training data :type train_dataset_dir: str @@ -141,6 +154,8 @@ class GOOSELiDARSegmentationDataset(dm_segmentation_dataset.LiDARSegmentationDat :type val_dataset_dir: str, optional :param test_dataset_dir: Directory containing test data, defaults to None :type test_dataset_dir: str, optional + :param is_goose_ex: Whether the dataset is GOOSE Ex or GOOSE, defaults to False + :type is_goose_ex: bool, optional """ def __init__( @@ -148,14 +163,16 @@ def __init__( train_dataset_dir: Optional[str] = None, val_dataset_dir: Optional[str] = None, test_dataset_dir: Optional[str] = None, + is_goose_ex: bool = False, ): dataset, ontology = build_dataset( "lidar", - "vls128.bin", + "pcl.bin" if is_goose_ex else "vls128.bin", "goose.label", train_dataset_dir, val_dataset_dir, test_dataset_dir, + is_goose_ex=is_goose_ex, ) # Convert to Pandas diff --git a/detectionmetrics/datasets/segmentation.py b/detectionmetrics/datasets/segmentation.py index 2c199aae..7a74ef53 100644 --- a/detectionmetrics/datasets/segmentation.py +++ b/detectionmetrics/datasets/segmentation.py @@ -12,6 +12,7 @@ from detectionmetrics.datasets.perception import PerceptionDataset import detectionmetrics.utils.io as uio import detectionmetrics.utils.conversion as uc +import detectionmetrics.utils.lidar as ul class SegmentationDataset(PerceptionDataset): @@ -68,7 +69,7 @@ def export( outdir: str, new_ontology: Optional[dict] = None, ontology_translation: Optional[dict] = None, - ignored_classes: Optional[List[str]] = None, + classes_to_remove: Optional[List[str]] = None, resize: Optional[Tuple[int, int]] = None, include_label_count: bool = True, ): @@ -80,8 +81,8 @@ def export( :type new_ontology: dict :param ontology_translation: Ontology translation dictionary, defaults to None :type ontology_translation: Optional[dict], optional - :param ignored_classes: Classes to ignore from the old ontology, defaults to [] - :type ignored_classes: Optional[List[str]], optional + :param classes_to_remove: Classes to remove from the old ontology, defaults to [] + :type classes_to_remove: Optional[List[str]], optional :param resize: Resize images and labels to the given dimensions, defaults to None :type resize: Optional[Tuple[int, int]], optional :param include_label_count: Whether to include class weights in the dataset, defaults to True @@ -104,7 +105,8 @@ def export( old_ontology=self.ontology, new_ontology=new_ontology, ontology_translation=ontology_translation, - ignored_classes=ignored_classes, + classes_to_remove=classes_to_remove, + lut_dtype=np.uint32, ) n_classes = max(c["idx"] for c in new_ontology.values()) + 1 else: @@ -166,7 +168,7 @@ def export( # Convert label to new ontology if needed if ontology_conversion_lut is not None: - label = ontology_conversion_lut[label] + label = ontology_conversion_lut[label].astype(np.uint8) # Resize label if needed if resize is not None: @@ -254,6 +256,8 @@ class LiDARSegmentationDataset(SegmentationDataset): :type ontology: dict :param is_kitti_format: Whether the linked files in the dataset are stored in SemanticKITTI format or not, defaults to True :type is_kitti_format: bool, optional + :param has_intensity: Whether the point cloud files contain intensity values, defaults to True + :type has_intensity: bool, optional """ def __init__( @@ -262,9 +266,11 @@ def __init__( dataset_dir: str, ontology: dict, is_kitti_format: bool = True, + has_intensity: bool = True, ): super().__init__(dataset, dataset_dir, ontology) self.is_kitti_format = is_kitti_format + self.has_intensity = has_intensity def make_fname_global(self): """Get all relative filenames in dataset and make global""" @@ -282,7 +288,9 @@ def export( outdir: str, new_ontology: Optional[dict] = None, ontology_translation: Optional[dict] = None, - ignored_classes: Optional[List[str]] = [], + classes_to_remove: Optional[List[str]] = [], + include_label_count: bool = True, + remove_origin: bool = False, ): """Export dataset dataframe and LiDAR files in SemanticKITTI format. Optionally, modify ontology before exporting. @@ -292,8 +300,12 @@ def export( :type new_ontology: dict :param ontology_translation: Ontology translation dictionary, defaults to None :type ontology_translation: Optional[dict], optional - :param ignored_classes: Classes to ignore from the old ontology, defaults to [] - :type ignored_classes: Optional[List[str]], optional + :param classes_to_remove: Classes to remove from the old ontology, defaults to [] + :type classes_to_remove: Optional[List[str]], optional + :param include_label_count: Whether to include class weights in the dataset, defaults to True + :type include_label_count: bool, optional + :param remove_origin: Whether to remove the origin from the point cloud (mostly for removing RELLIS-3D spurious points), defaults to False + :type remove_origin: bool, optional """ os.makedirs(outdir, exist_ok=True) @@ -302,14 +314,25 @@ def export( if ontology_translation is not None and new_ontology is None: raise ValueError("New ontology must be provided") + # Create ontology conversion lookup table if needed and get number of classes ontology_conversion_lut = None if new_ontology is not None: ontology_conversion_lut = uc.get_ontology_conversion_lut( old_ontology=self.ontology, new_ontology=new_ontology, ontology_translation=ontology_translation, - ignored_classes=ignored_classes, + classes_to_remove=classes_to_remove, ) + n_classes = max(c["idx"] for c in new_ontology.values()) + 1 + else: + n_classes = max(c["idx"] for c in self.ontology.values()) + 1 + + # Check if label count is missing and create empty array if needed + label_count_missing = include_label_count and ( + not self.has_label_count or new_ontology is not None or remove_origin + ) + if label_count_missing: + label_count = np.zeros(n_classes, dtype=np.uint64) pbar = tqdm(self.dataset.iterrows()) @@ -334,23 +357,51 @@ def export( label_fname = os.path.join(self.dataset_dir, label_fname) # If format is not appropriate: read, convert, and rewrite sample - if not self.is_kitti_format or ontology_conversion_lut is not None: + if ( + not self.is_kitti_format + or ontology_conversion_lut is not None + or label_count_missing + or remove_origin + ): points = self.read_points(points_fname) - label, _ = self.read_label(label_fname) + label = self.read_label(label_fname) + + # Convert label to new ontology if needed if ontology_conversion_lut is not None: - label = ontology_conversion_lut[label] + label = ontology_conversion_lut[label].astype(np.uint32) + + # Remove points in coordinate origin if needed + if remove_origin: + mask = np.all(points[:, :3] != 0, axis=1) + points = points[mask] + label = label[mask] + points.tofile(os.path.join(outdir, rel_points_fname)) label.tofile(os.path.join(outdir, rel_label_fname)) + + indices, counts = np.unique(label, return_counts=True) + label_count[indices] += counts.astype(np.uint64) else: - shutil.copy2(points_fname, os.path.join(outdir, rel_points_fname)) - shutil.copy2(label_fname, os.path.join(outdir, rel_label_fname)) + new_points_fname = os.path.join(outdir, rel_points_fname) + new_label_fname = os.path.join(outdir, rel_label_fname) + try: + shutil.copy2(points_fname, new_points_fname) + shutil.copy2(label_fname, new_label_fname) + except shutil.SameFileError: + pass # Source and destination are the same file self.dataset.at[sample_name, "points"] = rel_points_fname self.dataset.at[sample_name, "label"] = rel_label_fname + # Update dataset directory and ontology if needed self.dataset_dir = outdir + self.ontology = new_ontology if new_ontology is not None else self.ontology # Write ontology and store relative path in dataset attributes + if label_count_missing: + for class_data in self.ontology.values(): + class_data["label_count"] = int(label_count[class_data["idx"]]) + ontology_fname = "ontology.json" self.dataset.attrs = {"ontology_fname": ontology_fname} uio.write_json(os.path.join(outdir, ontology_fname), self.ontology) @@ -358,29 +409,23 @@ def export( # Store dataset as Parquet file containing relative filenames self.dataset.to_parquet(os.path.join(outdir, "dataset.parquet")) - @staticmethod - def read_points(fname: str) -> np.ndarray: - """Read points from a binary file in SemanticKITTI format + def read_points(self, fname: str) -> np.ndarray: + """Read point cloud. Defaults to SemanticKITTI format - :param fname: Binary file containing points + :param fname: File containing point cloud :type fname: str :return: Numpy array containing points :rtype: np.ndarray """ - points = np.fromfile(fname, dtype=np.float32) - return points.reshape((-1, 4)) + return ul.read_semantickitti_points(fname, self.has_intensity) - @staticmethod - def read_label(fname: str) -> Tuple[np.ndarray, np.ndarray]: - """Read labels from a binary file in SemanticKITTI format + def read_label(self, fname: str) -> Tuple[np.ndarray, np.ndarray]: + """Read semantic labels. Defaults to SemanticKITTI format :param fname: Binary file containing labels :type fname: str - :return: Numpy arrays containing semantic and instance labels - :rtype: Tuple[np.ndarray, np.ndarray] + :return: Numpy arrays containing semantic labels + :rtype: np.ndarray """ - label = np.fromfile(fname, dtype=np.uint32) - label = label.reshape((-1)) - semantic_label = label & 0xFFFF - instance_label = label >> 16 - return semantic_label.astype(np.int32), instance_label.astype(np.int32) + label, _ = ul.read_semantickitti_label(fname) + return label diff --git a/detectionmetrics/datasets/wildscenes.py b/detectionmetrics/datasets/wildscenes.py index a2dce7a3..2994ab5d 100644 --- a/detectionmetrics/datasets/wildscenes.py +++ b/detectionmetrics/datasets/wildscenes.py @@ -8,61 +8,17 @@ from detectionmetrics.datasets import segmentation as dm_segmentation_dataset -# Ontology definition as found in the official repo (https://github.com/csiro-robotics/WildScenes/blob/main/wildscenes/tools/utils2d.py) -METAINFO = { - "classes": ( - "unlabelled", - "asphalt", - "dirt", - "mud", - "water", - "gravel", - "other-terrain", - "tree-trunk", - "tree-foliage", - "bush", - "fence", - "structure", - "pole", - "vehicle", - "rock", - "log", - "other-object", - "sky", - "grass", - ), - "palette": [ - (0, 0, 0), - (255, 165, 0), - (60, 180, 75), - (255, 225, 25), - (0, 130, 200), - (145, 30, 180), - (70, 240, 240), - (240, 50, 230), - (210, 245, 60), - (230, 25, 75), - (0, 128, 128), - (170, 110, 40), - (255, 250, 200), - (128, 0, 0), - (170, 255, 195), - (128, 128, 0), - (250, 190, 190), - (0, 0, 128), - (128, 128, 128), - ], - "cidx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], -} - - -def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]: +def build_dataset( + dataset_dir: str, split_fnames: dict, ontology: dict +) -> Tuple[dict, dict]: """Build dataset and ontology dictionaries from Wildscenes dataset structure :param dataset_dir: Directory where both RGB images and annotations have been extracted to :type dataset_dir: str :param split_fnames: Dictionary that contains the paths where train, val, and test split files (.csv) have been extracted to :type split_dir: str + :param ontology: Ontology definition as found in the official repo + :type ontology: dict :return: Dataset and onotology :rtype: Tuple[dict, dict] """ @@ -75,10 +31,10 @@ def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]: assert os.path.isfile(split_fname), f"{split_fname} split file not found" # Load and adapt ontology - ontology = {} - ontology_iter = zip(METAINFO["classes"], METAINFO["palette"], METAINFO["cidx"]) + parsed_ontology = {} + ontology_iter = zip(ontology["classes"], ontology["palette"], ontology["cidx"]) for name, color, idx in ontology_iter: - ontology[name] = {"idx": idx, "rgb": color} + parsed_ontology[name] = {"idx": idx, "rgb": color} # Get samples filenames train_split = pd.read_csv(split_fnames["train"]) @@ -92,6 +48,9 @@ def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]: samples_data = pd.concat([train_split, val_split, test_split]) + if "hist_path" in samples_data.columns: + samples_data = samples_data.drop(columns=["hist_path"]) + # Build dataset as ordered python dictionary dataset = OrderedDict() skipped_samples = [] @@ -120,20 +79,20 @@ def build_dataset(dataset_dir: str, split_fnames: dict) -> Tuple[dict, dict]: for sample_name in skipped_samples: print(f"\n\t{sample_name}") - return dataset, ontology + return dataset, parsed_ontology class WildscenesImageSegmentationDataset( dm_segmentation_dataset.ImageSegmentationDataset ): """Specific class for Wildscenes-styled image segmentation datasets. All data can - be downloaded from the official repo (https://github.com/unmannedlab/RELLIS-3D): + be downloaded from the official repo: dataset -> https://data.csiro.au/collection/csiro:61541 split -> https://github.com/csiro-robotics/WildScenes/tree/main/data/splits/opt2d :param dataset_dir: Directory where dataset images and labels are stored (Wildscenes2D) :type dataset_dir: str - :param split_dir: Directory where train, val, and test files (.csv) have been extracted to (data/splits/opt2d from the official repo) + :param split_dir: Directory where train, val, and test files (.csv) have been extracted to :type split_dir: str """ @@ -143,7 +102,54 @@ def __init__(self, dataset_dir: str, split_dir: str): "val": os.path.join(split_dir, "val.csv"), "test": os.path.join(split_dir, "test.csv"), } - dataset, ontology = build_dataset(dataset_dir, split_fnames) + + # Ontology definition as found in the official repo (https://github.com/csiro-robotics/WildScenes/blob/main/wildscenes/tools/utils2d.py) + METAINFO = { + "classes": ( + "unlabelled", + "asphalt", + "dirt", + "mud", + "water", + "gravel", + "other-terrain", + "tree-trunk", + "tree-foliage", + "bush", + "fence", + "structure", + "pole", + "vehicle", + "rock", + "log", + "other-object", + "sky", + "grass", + ), + "palette": [ + (0, 0, 0), + (255, 165, 0), + (60, 180, 75), + (255, 225, 25), + (0, 130, 200), + (145, 30, 180), + (70, 240, 240), + (240, 50, 230), + (210, 245, 60), + (230, 25, 75), + (0, 128, 128), + (170, 110, 40), + (255, 250, 200), + (128, 0, 0), + (170, 255, 195), + (128, 128, 0), + (250, 190, 190), + (0, 0, 128), + (128, 128, 128), + ], + "cidx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + } + dataset, ontology = build_dataset(dataset_dir, split_fnames, METAINFO) # Convert to Pandas cols = ["image", "label", "scene", "split"] @@ -151,3 +157,74 @@ def __init__(self, dataset_dir: str, split_dir: str): dataset.attrs = {"ontology": ontology} super().__init__(dataset, dataset_dir, ontology) + + +class WildscenesLiDARSegmentationDataset( + dm_segmentation_dataset.LiDARSegmentationDataset +): + """Specific class for Wildscenes-styled LiDAR segmentation datasets. All data can + be downloaded from the official repo: + dataset -> https://data.csiro.au/collection/csiro:61541 + split -> https://github.com/csiro-robotics/WildScenes/tree/main/data/splits/opt3d + + :param dataset_dir: Directory where dataset images and labels are stored (Wildscenes3D) + :type dataset_dir: str + :param split_dir: Directory where train, val, and test files (.csv) have been extracted to + :type split_dir: str + """ + + def __init__(self, dataset_dir: str, split_dir: str): + split_fnames = { + "train": os.path.join(split_dir, "train.csv"), + "val": os.path.join(split_dir, "val.csv"), + "test": os.path.join(split_dir, "test.csv"), + } + + # Ontology definition as found in the official repo (https://github.com/csiro-robotics/WildScenes/blob/main/wildscenes/tools/utils3d.py) + METAINFO = { + "classes": ( + "unlabelled", + "bush", + "dirt", + "fence", + "grass", + "gravel", + "log", + "mud", + "other-object", + "other-terrain", + "rock", + "sky", + "structure", + "tree-foliage", + "tree-trunk", + "water", + ), + "palette": [ + (0, 0, 0), + (230, 25, 75), + (60, 180, 75), + (0, 128, 128), + (128, 128, 128), + (145, 30, 180), + (128, 128, 0), + (255, 225, 25), + (250, 190, 190), + (70, 240, 240), + (170, 255, 195), + (0, 0, 128), + (170, 110, 40), + (210, 245, 60), + (240, 50, 230), + (0, 130, 200), + ], + "cidx": [255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + } + dataset, ontology = build_dataset(dataset_dir, split_fnames, METAINFO) + + # Convert to Pandas + cols = ["points", "label", "scene", "split"] + dataset = pd.DataFrame.from_dict(dataset, orient="index", columns=cols) + dataset.attrs = {"ontology": ontology} + + super().__init__(dataset, dataset_dir, ontology, has_intensity=False) diff --git a/detectionmetrics/models/__init__.py b/detectionmetrics/models/__init__.py index 38b2f620..e8e4aa54 100644 --- a/detectionmetrics/models/__init__.py +++ b/detectionmetrics/models/__init__.py @@ -19,7 +19,7 @@ print("Torch detection not available") try: - from detectionmetrics.models.tensorflow import TensorflowImageSegmentationModel + from detectionmetrics.models.tf_segmentation import TensorflowImageSegmentationModel REGISTRY["tensorflow_image_segmentation"] = TensorflowImageSegmentationModel except ImportError: diff --git a/detectionmetrics/models/perception.py b/detectionmetrics/models/perception.py index f78b1472..e6ece062 100644 --- a/detectionmetrics/models/perception.py +++ b/detectionmetrics/models/perception.py @@ -47,6 +47,7 @@ def __init__( self.ontology = uio.read_json(ontology_fname) self.model_cfg = uio.read_json(model_cfg) self.n_classes = len(self.ontology) + self.model_cfg["n_classes"] = self.n_classes @abstractmethod def inference( @@ -90,6 +91,6 @@ def get_lut_ontology( dataset_ontology, self.ontology, ontology_translation, - self.model_cfg.get("ignored_classes", []), + classes_to_remove=self.model_cfg.get("classes_to_remove", None), ) return lut_ontology diff --git a/detectionmetrics/models/segmentation.py b/detectionmetrics/models/segmentation.py index 66a4b141..10be5aeb 100644 --- a/detectionmetrics/models/segmentation.py +++ b/detectionmetrics/models/segmentation.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod import os -from typing import Any, List, Optional, Union +from typing import Any, List, Optional, Tuple, Union import numpy as np import pandas as pd @@ -39,24 +39,36 @@ def __init__( super().__init__(model, model_type, model_cfg, ontology_fname, model_fname) @abstractmethod - def inference( - self, points: Union[np.ndarray, Image.Image] + def predict( + self, data: Union[np.ndarray, Image.Image] ) -> Union[np.ndarray, Image.Image]: - """Perform inference for a single image or point cloud + """Perform prediction for a single data sample - :param image: Either a numpy array (LiDAR point cloud) or a PIL image - :type image: Union[np.ndarray, Image.Image] - :return: Segmenation result as a point cloud or image with label indices + :param data: Input data sample (image or point cloud) + :type data: Union[np.ndarray, Image.Image] + :return: Prediction result :rtype: Union[np.ndarray, Image.Image] """ raise NotImplementedError + @abstractmethod + def inference(self, tensor_in): + """Perform inference for a tensor + + :param tensor_in: Input tensor (image or point cloud) + :type tensor_in: Either tf.Tensor or torch.Tensor + :return: Segmenation result as a tensor + :rtype: Either tf.Tensor or torch.Tensor + """ + raise NotImplementedError + @abstractmethod def eval( self, dataset: dm_segentation_dataset.SegmentationDataset, - split: str | List[str] = "test", + split: Union[str, List[str]] = "test", ontology_translation: Optional[str] = None, + translations_direction: str = "dataset_to_model", predictions_outdir: Optional[str] = None, results_per_sample: bool = False, ) -> pd.DataFrame: @@ -65,9 +77,11 @@ def eval( :param dataset: Segmentation dataset for which the evaluation will be performed :type dataset: ImageSegmentationDataset :param split: Split or splits to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies :type ontology_translation: str, optional + :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model" + :type translations_direction: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. :type predictions_outdir: Optional[str], optional :param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided. @@ -104,13 +118,17 @@ def __init__( super().__init__(model, model_type, model_cfg, ontology_fname, model_fname) @abstractmethod - def inference(self, image: Image.Image) -> Image.Image: - """Perform inference for a single image + def predict( + self, image: Image.Image, return_sample: bool = False + ) -> Union[Image.Image, Tuple[Image.Image, Any]]: + """Perform prediction for a single image - :param image: PIL image. + :param image: PIL image :type image: Image.Image - :return: Segmenation result as PIL image - :rtype: Image.Image + :param return_sample: Whether to return the sample data along with predictions, defaults to False + :type return_sample: bool, optional + :return: Segmentation result as a PIL image or a tuple with the segmentation result and the input sample tensor + :rtype: Union[Image.Image, Tuple[Image.Image, Any]] """ raise NotImplementedError @@ -118,8 +136,9 @@ def inference(self, image: Image.Image) -> Image.Image: def eval( self, dataset: dm_segentation_dataset.ImageSegmentationDataset, - split: str | List[str] = "test", + split: Union[str, List[str]] = "test", ontology_translation: Optional[str] = None, + translations_direction: str = "dataset_to_model", predictions_outdir: Optional[str] = None, results_per_sample: bool = False, ) -> pd.DataFrame: @@ -128,9 +147,11 @@ def eval( :param dataset: Image segmentation dataset for which the evaluation will be performed :type dataset: ImageSegmentationDataset :param split: Split or splits to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies :type ontology_translation: str, optional + :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model" + :type translations_direction: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. :type predictions_outdir: Optional[str], optional :param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided. @@ -140,6 +161,25 @@ def eval( """ raise NotImplementedError + @abstractmethod + def get_computational_cost( + self, + image_size: Tuple[int] = None, + runs: int = 30, + warm_up_runs: int = 5, + ) -> dict: + """Get different metrics related to the computational cost of the model + + :param image_size: Image size used for inference + :type image_size: Tuple[int], optional + :param runs: Number of runs to measure inference time, defaults to 30 + :type runs: int, optional + :param warm_up_runs: Number of warm-up runs, defaults to 5 + :type warm_up_runs: int, optional + :return: Dictionary containing computational cost information + """ + raise NotImplementedError + class LiDARSegmentationModel(SegmentationModel): """Parent LiDAR segmentation model class @@ -167,13 +207,22 @@ def __init__( super().__init__(model, model_type, model_cfg, ontology_fname, model_fname) @abstractmethod - def inference(self, points: np.ndarray) -> np.ndarray: - """Perform inference for a single image + def predict( + self, + points_fname: str, + has_intensity: bool = True, + return_sample: bool = False, + ) -> Union[np.ndarray, Tuple[np.ndarray, Any]]: + """Perform prediction for a single point cloud - :param image: Point cloud xyz array - :type image: np.ndarray - :return: Segmenation result as a point cloud with label indices - :rtype: np.ndarray + :param points_fname: Point cloud in SemanticKITTI .bin format + :type points_fname: str + :param has_intensity: Whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param return_sample: Whether to return the sample data along with predictions, defaults to False + :type return_sample: bool, optional + :return: Segmentation result as a numpy array or a tuple with the segmentation result and the input sample data + :rtype: Union[np.ndarray, Tuple[np.ndarray, Any]] """ raise NotImplementedError @@ -181,8 +230,9 @@ def inference(self, points: np.ndarray) -> np.ndarray: def eval( self, dataset: dm_segentation_dataset.LiDARSegmentationDataset, - split: str | List[str] = "test", + split: Union[str, List[str]] = "test", ontology_translation: Optional[str] = None, + translations_direction: str = "dataset_to_model", predictions_outdir: Optional[str] = None, results_per_sample: bool = False, ) -> pd.DataFrame: @@ -191,9 +241,11 @@ def eval( :param dataset: LiDAR segmentation dataset for which the evaluation will be performed :type dataset: LiDARSegmentationDataset :param split: Split or splits to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies :type ontology_translation: str, optional + :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model" + :type translations_direction: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. :type predictions_outdir: Optional[str], optional :param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided. @@ -202,3 +254,15 @@ def eval( :rtype: pd.DataFrame """ raise NotImplementedError + + @abstractmethod + def get_computational_cost(self, runs: int = 30, warm_up_runs: int = 5) -> dict: + """Get different metrics related to the computational cost of the model + + :param runs: Number of runs to measure inference time, defaults to 30 + :type runs: int, optional + :param warm_up_runs: Number of warm-up runs, defaults to 5 + :type warm_up_runs: int, optional + :return: Dictionary containing computational cost information + """ + raise NotImplementedError diff --git a/detectionmetrics/models/tensorflow.py b/detectionmetrics/models/tf_segmentation.py similarity index 78% rename from detectionmetrics/models/tensorflow.py rename to detectionmetrics/models/tf_segmentation.py index a1e25012..38b33130 100644 --- a/detectionmetrics/models/tensorflow.py +++ b/detectionmetrics/models/tf_segmentation.py @@ -13,74 +13,12 @@ from detectionmetrics.datasets.segmentation import ImageSegmentationDataset from detectionmetrics.models.segmentation import ImageSegmentationModel +import detectionmetrics.utils.conversion as uc +import detectionmetrics.utils.io as uio import detectionmetrics.utils.segmentation_metrics as um -tf.config.optimizer.set_experimental_options({"layout_optimizer": False}) - - -def get_computational_cost( - model: tf.Module, - dummy_input: tf.Tensor, - model_fname: Optional[str] = None, - runs: int = 30, - warm_up_runs: int = 5, -) -> dict: - """Get different metrics related to the computational cost of the model - - :param model: Loaded TensorFlow SavedModel - :type model: tf.Module - :param dummy_input: Dummy input data for the model - :type dummy_input: tf.Tensor - :param model_fname: Model filename used to measure model size, defaults to None - :type model_fname: Optional[str], optional - :param runs: Number of runs to measure inference time, defaults to 30 - :type runs: int, optional - :param warm_up_runs: Number of warm-up runs, defaults to 5 - :type warm_up_runs: int, optional - :return: DataFrame containing computational cost information - :rtype: pd.DataFrame - """ - # Get model size (if possible) and number of parameters - if model_fname is not None: - size_mb = sum( - os.path.getsize(os.path.join(dirpath, f)) - for dirpath, _, files in os.walk(model_fname) - for f in files - ) - size_mb /= 1024**2 - else: - size_mb = None - - n_params = sum(np.prod(var.shape) for var in model.variables.variables) - - # Measure inference time with GPU synchronization - infer = model.signatures["serving_default"] - for _ in range(warm_up_runs): - _ = infer(dummy_input) - - has_gpu = bool(tf.config.list_physical_devices("GPU")) - inference_times = [] - - for _ in range(runs): - if has_gpu: - tf.config.experimental.set_synchronous_execution(True) - - start_time = time.time() - _ = infer(dummy_input) - if has_gpu: - tf.config.experimental.set_synchronous_execution(True) - - inference_times.append(time.time() - start_time) - - # Retrieve computational cost information - result = { - "input_shape": ["x".join(map(str, dummy_input.shape.as_list()))], - "n_params": [int(n_params)], - "size_mb": [size_mb], - "inference_time_s": [np.mean(inference_times)], - } - return pd.DataFrame.from_dict(result) +tf.config.optimizer.set_experimental_options({"layout_optimizer": False}) def resize_image( @@ -361,33 +299,53 @@ def t_in(image): tf.argmax(tf.squeeze(x), axis=2).numpy().astype(np.uint8) ) - def inference(self, image: Image.Image) -> Image.Image: - """Perform inference for a single image + def predict( + self, image: Image.Image, return_sample: bool = False + ) -> Union[Image.Image, Tuple[Image.Image, tf.Tensor]]: + """Perform prediction for a single image :param image: PIL image :type image: Image.Image - :return: segmenation result as PIL image - :rtype: Image.Image + :param return_sample: Whether to return the sample data along with predictions, defaults to False + :type return_sample: bool, optional + :return: Segmentation result as a PIL image or a tuple with the segmentation result and the input sample tensor + :rtype: Union[Image.Image, Tuple[Image.Image, tf.Tensor]] """ - tensor = self.t_in(image) + sample = self.t_in(image) + result = self.inference(sample) + result = self.t_out(result) + + if return_sample: + return result, sample + else: + return result + def inference(self, tensor_in: tf.Tensor) -> tf.Tensor: + """Perform inference for a tensor + + :param tensor_in: Input point cloud tensor + :type tensor_in: tf.Tensor + :return: Segmentation result as tensor + :rtype: tf.Tensor + """ if self.model_type == "native": - result = self.model(tensor) + tensor_out = self.model(tensor_in, training=False) elif self.model_type == "compiled": - result = self.model.signatures["serving_default"](tensor) + tensor_out = self.model.signatures["serving_default"](tensor_in) else: raise ValueError("Model type not recognized") - if isinstance(result, dict): - result = list(result.values())[0] + if isinstance(tensor_out, dict): + tensor_out = list(tensor_out.values())[0] - return self.t_out(result) + return tensor_out def eval( self, dataset: ImageSegmentationDataset, - split: str | List[str] = "test", + split: Union[str, List[str]] = "test", ontology_translation: Optional[str] = None, + translations_direction: str = "dataset_to_model", predictions_outdir: Optional[str] = None, results_per_sample: bool = False, ) -> pd.DataFrame: @@ -396,9 +354,11 @@ def eval( :param dataset: Image segmentation dataset for which the evaluation will be performed :type dataset: ImageSegmentationDataset :param split: Split to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies :type ontology_translation: str, optional + :param translations_direction: Direction of the ontology translation. Either "dataset_to_model" or "model_to_dataset", defaults to "dataset_to_model" + :type translations_direction: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. :type predictions_outdir: Optional[str], optional :param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided. @@ -417,8 +377,23 @@ def eval( os.makedirs(predictions_outdir, exist_ok=True) # Build a LUT for transforming ontology if needed - lut_ontology = self.get_lut_ontology(dataset.ontology, ontology_translation) - dataset_ontology = dataset.ontology + eval_ontology = self.ontology + + if ontology_translation is not None: + ontology_translation = uio.read_json(ontology_translation) + if translations_direction == "dataset_to_model": + lut_ontology = uc.get_ontology_conversion_lut( + dataset.ontology, self.ontology, ontology_translation + ) + else: + eval_ontology = dataset.ontology + lut_ontology = uc.get_ontology_conversion_lut( + self.ontology, dataset.ontology, ontology_translation + ) + else: + lut_ontology = None + + n_classes = len(eval_ontology) # Get Tensorflow dataset dataset = ImageSegmentationTensorflowDataset( @@ -427,7 +402,9 @@ def eval( crop=self.model_cfg.get("crop", None), batch_size=self.model_cfg.get("batch_size", 1), splits=[split] if isinstance(split, str) else split, - lut_ontology=lut_ontology, + lut_ontology=( + lut_ontology if translations_direction == "dataset_to_model" else None + ), normalization=self.model_cfg.get("normalization", None), keep_aspect=self.model_cfg.get("keep_aspect", False), ) @@ -435,25 +412,17 @@ def eval( # Retrieve ignored label indices ignored_label_indices = [] for ignored_class in self.model_cfg.get("ignored_classes", []): - ignored_label_indices.append(dataset_ontology[ignored_class]["idx"]) + ignored_label_indices.append(eval_ontology[ignored_class]["idx"]) # Init metrics - metrics_factory = um.SegmentationMetricsFactory(self.n_classes) + metrics_factory = um.SegmentationMetricsFactory(n_classes) # Evaluation loop pbar = tqdm(dataset.dataset) for idx, image, label in pbar: idx = idx.numpy() - if self.model_type == "native": - pred = self.model(image, training=False) - elif self.model_type == "compiled": - pred = self.model.signatures["serving_default"](image) - else: - raise ValueError("Model type not recognized") - - if isinstance(pred, dict): - pred = list(pred.values())[0] + pred = self.inference(image) # Get valid points masks depending on ignored label indices if ignored_label_indices: @@ -469,6 +438,13 @@ def eval( if valid_mask is not None: valid_mask = tf.squeeze(valid_mask, axis=3).numpy() + # Convert predictions to dataset ontology if needed + if ( + lut_ontology is not None + and translations_direction == "model_to_dataset" + ): + pred = lut_ontology[pred] + metrics_factory.update(pred, label, valid_mask) # Store predictions and results per sample if required @@ -481,16 +457,16 @@ def eval( sample_valid_mask = ( valid_mask[i] if valid_mask is not None else None ) - sample_mf = um.SegmentationMetricsFactory(n_classes=self.n_classes) + sample_mf = um.SegmentationMetricsFactory(n_classes) sample_mf.update(sample_pred, sample_label, sample_valid_mask) - sample_df = um.get_metrics_dataframe(sample_mf, self.ontology) + sample_df = um.get_metrics_dataframe(sample_mf, eval_ontology) sample_df.to_csv( os.path.join(predictions_outdir, f"{sample_idx}.csv") ) pred = Image.fromarray(np.squeeze(pred).astype(np.uint8)) pred.save(os.path.join(predictions_outdir, f"{sample_idx}.png")) - return um.get_metrics_dataframe(metrics_factory, self.ontology) + return um.get_metrics_dataframe(metrics_factory, eval_ontology) def get_computational_cost( self, @@ -508,7 +484,46 @@ def get_computational_cost( :type warm_up_runs: int, optional :return: Dictionary containing computational cost information """ + # Generate dummy input dummy_input = tf.random.normal([1, *image_size, 3]) - return get_computational_cost( - self.model, dummy_input, self.model_fname, runs, warm_up_runs - ) + + # Get model size (if possible) and number of parameters + if self.model_fname is not None: + size_mb = sum( + os.path.getsize(os.path.join(dirpath, f)) + for dirpath, _, files in os.walk(self.model_fname) + for f in files + ) + size_mb /= 1024**2 + else: + size_mb = None + + n_params = sum(np.prod(var.shape) for var in self.model.variables.variables) + + # Measure inference time with GPU synchronization + for _ in range(warm_up_runs): + self.inference(dummy_input) + + has_gpu = bool(tf.config.list_physical_devices("GPU")) + inference_times = [] + + for _ in range(runs): + if has_gpu: + tf.config.experimental.set_synchronous_execution(True) + + start_time = time.time() + self.inference(dummy_input) + + if has_gpu: + tf.config.experimental.set_synchronous_execution(True) + + inference_times.append(time.time() - start_time) + + # Retrieve computational cost information + result = { + "input_shape": ["x".join(map(str, dummy_input.shape.as_list()))], + "n_params": [int(n_params)], + "size_mb": [size_mb], + "inference_time_s": [np.mean(inference_times)], + } + return pd.DataFrame.from_dict(result) diff --git a/detectionmetrics/models/torch_detection.py b/detectionmetrics/models/torch_detection.py index 5685ee7f..56aa9a09 100644 --- a/detectionmetrics/models/torch_detection.py +++ b/detectionmetrics/models/torch_detection.py @@ -268,7 +268,7 @@ def __init__( # Default to 640x640 when no resize is specified resize_height = 640 resize_width = 640 - + self.transform_input += [ transforms.Resize( size=(resize_height, resize_width), @@ -325,7 +325,7 @@ def inference(self, image: Image.Image) -> Dict[str, torch.Tensor]: def eval( self, dataset: dm_detection_dataset.ImageDetectionDataset, - split: str | List[str] = "test", + split: Union[str, List[str]] = "test", ontology_translation: Optional[str] = None, predictions_outdir: Optional[str] = None, results_per_sample: bool = False, @@ -337,7 +337,7 @@ def eval( :param dataset: Image detection dataset :type dataset: ImageDetectionDataset :param split: Dataset split(s) to evaluate - :type split: str | List[str] + :type split: Union[str, List[str]] :param ontology_translation: Optional translation for class mapping :type ontology_translation: Optional[str] :param predictions_outdir: Directory to save predictions, if desired diff --git a/detectionmetrics/models/torch_model_utils/__init__.py b/detectionmetrics/models/torch_model_utils/__init__.py deleted file mode 100644 index 48f449a4..00000000 --- a/detectionmetrics/models/torch_model_utils/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Optional, Tuple - -import numpy as np - -try: - from open3d._ml3d.datasets.utils import DataProcessing -except Exception: - print("Open3D-ML3D not available") -from sklearn.neighbors import KDTree - -from detectionmetrics.models.torch_model_utils import o3d_randlanet, o3d_kpconv - - -# Default functions -def preprocess( - points: np.ndarray, cfg: Optional[dict] = {} -) -> Tuple[np.ndarray, KDTree, np.ndarray]: - """Preprocess point cloud data - - :param points: Point cloud data - :type points: np.ndarray - :param cfg: Dictionary containing model configuration, defaults to {} - :type cfg: Optional[dict], optional - :return: Subsampled points, search tree, and projected indices - :rtype: Tuple[np.ndarray, KDTree, np.ndarray] - """ - # Keep only XYZ coordinates - points = np.array(points[:, 0:3], dtype=np.float32) - - # Subsample points using a grid of given size - grid_size = cfg.get("grid_size", 0.06) - sub_points = DataProcessing.grid_subsampling(points, grid_size=grid_size) - - # Create search tree so that we can project points back to the original point cloud - search_tree = KDTree(sub_points) - projected_indices = np.squeeze(search_tree.query(points, return_distance=False)) - projected_indices = projected_indices.astype(np.int32) - - return sub_points, search_tree, projected_indices - - -transform_input = o3d_randlanet.transform_input -update_probs = o3d_randlanet.update_probs diff --git a/detectionmetrics/models/torch_segmentation.py b/detectionmetrics/models/torch_segmentation.py index bf628a5e..fc76c3a1 100644 --- a/detectionmetrics/models/torch_segmentation.py +++ b/detectionmetrics/models/torch_segmentation.py @@ -1,6 +1,7 @@ -from collections import defaultdict +import importlib import os import time +import tempfile from typing import Any, List, Optional, Tuple, Union import numpy as np @@ -8,77 +9,41 @@ from PIL import Image import torch from torch.utils.data import DataLoader, Dataset -from torchvision.transforms import v2 as transforms -from torchvision.transforms.v2 import functional as F + +try: + from torchvision.transforms import v2 as transforms + from torchvision.transforms.v2 import functional as F +except ImportError: + from torchvision.transforms import transforms + from torchvision.transforms import functional as F from tqdm import tqdm from detectionmetrics.datasets import segmentation as dm_segmentation_dataset from detectionmetrics.models import segmentation as dm_segmentation_model -from detectionmetrics.models import torch_model_utils as tmu -import detectionmetrics.utils.lidar as ul +import detectionmetrics.utils.conversion as uc +import detectionmetrics.utils.io as uio import detectionmetrics.utils.segmentation_metrics as um +import detectionmetrics.utils.torch as ut -def data_to_device( - data: Union[tuple, list], device: torch.device -) -> Union[tuple, list]: - """Move provided data to given device (CPU or GPU) - - :param data: Data provided (it can be a single or multiple tensors) - :type data: Union[tuple, list] - :param device: Device to move data to - :type device: torch.device - :return: Data moved to device - :rtype: Union[tuple, list] - """ - if isinstance(data, (tuple, list)): - return type(data)( - d.to(device) if torch.is_tensor(d) else data_to_device(d, device) - for d in data - ) - elif torch.is_tensor(data): - return data.to(device) - else: - return data - - -def get_data_shape(data: Union[tuple, list]) -> Union[tuple, list]: - """Get the shape of the provided data - - :param data: Data provided (it can be a single or multiple tensors) - :type data: Union[tuple, list] - :return: Data shape - :rtype: Union[tuple, list] - """ - if isinstance(data, (tuple, list)): - return type(data)( - tuple(d.shape) if torch.is_tensor(d) else get_data_shape(d) for d in data - ) - elif torch.is_tensor(data): - return tuple(data.shape) - else: - return tuple(data.shape) +AVAILABLE_MODEL_FORMATS_LIDAR = [ + "o3d_randlanet", + "o3d_kpconv", + "mmdet3d", + "sphereformer", + "lsk3dnet", +] -def unsqueeze_data(data: Union[tuple, list], dim: int = 0) -> Union[tuple, list]: - """Unsqueeze provided data along given dimension +def raise_unknown_model_format_lidar(model_format: str) -> None: + """Raise an exception if the LiDAR model format is unknown - :param data: Data provided (it can be a single or multiple tensors) - :type data: Union[tuple, list] - :param dim: Dimension that will be unsqueezed, defaults to 0 - :type dim: int, optional - :return: Unsqueezed data - :rtype: Union[tuple, list] + :param input_format: Model format string + :type input_format: str """ - if isinstance(data, (tuple, list)): - return type(data)( - d.unsqueeze(dim) if torch.is_tensor(d) else unsqueeze_data(d, dim) - for d in data - ) - elif torch.is_tensor(data): - return data.unsqueeze(dim) - else: - return data + msg = f"Unknown model format: {model_format}." + msg += f"Available formats: {AVAILABLE_MODEL_FORMATS_LIDAR}" + raise Exception(msg) def get_computational_cost( @@ -103,63 +68,6 @@ def get_computational_cost( :return: DataFrame containing computational cost information :rtype: pd.DataFrame """ - # Get model size if possible - if model_fname is not None: - size_mb = os.path.getsize(model_fname) / 1024**2 - else: - size_mb = None - - # Measure inference time with GPU synchronization - dummy_tuple = dummy_input if isinstance(dummy_input, tuple) else (dummy_input,) - - for _ in range(warm_up_runs): - if hasattr(model, "inference"): # e.g. mmsegmentation models - model.inference( - *dummy_tuple, - [ - dict( - ori_shape=dummy_tuple[0].shape[2:], - img_shape=dummy_tuple[0].shape[2:], - pad_shape=dummy_tuple[0].shape[2:], - padding_size=[0, 0, 0, 0], - ) - ] - * dummy_tuple[0].shape[0], - ) - else: - model(*dummy_tuple) - - inference_times = [] - for _ in range(runs): - torch.cuda.synchronize() - start_time = time.time() - if hasattr(model, "inference"): # e.g. mmsegmentation models - model.inference( - *dummy_tuple, - [ - dict( - ori_shape=dummy_tuple[0].shape[2:], - img_shape=dummy_tuple[0].shape[2:], - pad_shape=dummy_tuple[0].shape[2:], - padding_size=[0, 0, 0, 0], - ) - ] - * dummy_tuple[0].shape[0], - ) - else: - model(*dummy_tuple) - torch.cuda.synchronize() - end_time = time.time() - inference_times.append(end_time - start_time) - - result = { - "input_shape": ["x".join(map(str, get_data_shape(dummy_input)))], - "n_params": [sum(p.numel() for p in model.parameters())], - "size_mb": [size_mb], - "inference_time_s": [np.mean(inference_times)], - } - - return pd.DataFrame.from_dict(result) class CustomResize(torch.nn.Module): @@ -256,16 +164,14 @@ def __getitem__( class LiDARSegmentationTorchDataset(Dataset): - """Dataset for LiDAR segmentation PyTorch models + """Dataset for LiDAR segmentation PyTorch - Open3D-ML models :param dataset: LiDAR segmentation dataset :type dataset: LiDARSegmentationDataset :param model_cfg: Dictionary containing model configuration :type model_cfg: dict - :param preprocess: Function for preprocessing point clouds - :type preprocess: callable - :param n_classes: Number of classes estimated by the model - :type n_classes: int + :param get_sample: Function for loading sample data + :type get_sample: callable :param splits: Splits to be used from the dataset, defaults to ["test"] :type splits: str, optional """ @@ -274,59 +180,33 @@ def __init__( self, dataset: dm_segmentation_dataset.LiDARSegmentationDataset, model_cfg: dict, - preprocess: callable, - n_classes: int, + get_sample: callable, splits: str = ["test"], ): # Filter split and make filenames global dataset.dataset = dataset.dataset[dataset.dataset["split"].isin(splits)] self.dataset = dataset self.dataset.make_fname_global() - self.model_cfg = model_cfg - self.preprocess = preprocess - self.n_classes = n_classes + self.get_sample = get_sample def __len__(self): return len(self.dataset.dataset) - def __getitem__( - self, idx: int - ) -> Tuple[Union[np.ndarray, torch.Tensor], Union[np.ndarray, torch.Tensor]]: - """Prepare sample data: point cloud and label + def __getitem__(self, idx: int): + """Prepare sample data :param idx: Sample index :type idx: int - :return: Point cloud and corresponding label tensor or numpy arrays - :rtype: Tuple[np.ndarray, np.ndarray,] + :return: Sample data required by the model """ - # Read the point cloud and its labels - points = self.dataset.read_points(self.dataset.dataset.iloc[idx]["points"]) - semantic_label, instance_label = self.dataset.read_label( - self.dataset.dataset.iloc[idx]["label"] - ) - - # Preprocess point cloud - preprocessed_points, search_tree, projected_indices = self.preprocess( - points, self.model_cfg - ) - - # Init sampler - sampler = None - if "sampler" in self.model_cfg: - sampler = ul.Sampler( - preprocessed_points.shape[0], - search_tree, - self.model_cfg["sampler"], - self.n_classes, - ) - - return ( - self.dataset.dataset.index[idx], - preprocessed_points, - projected_indices, - (semantic_label, instance_label), - sampler, + return self.get_sample( + points_fname=self.dataset.dataset.iloc[idx]["points"], + model_cfg=self.model_cfg, + label_fname=self.dataset.dataset.iloc[idx]["label"], + name=self.dataset.dataset.index[idx], + idx=idx, + has_intensity=self.dataset.has_intensity, ) @@ -440,43 +320,62 @@ def __init__( ] ) - def inference(self, image: Image.Image) -> Image.Image: - """Perform inference for a single image + def predict( + self, image: Image.Image, return_sample: bool = False + ) -> Union[Image.Image, Tuple[Image.Image, torch.Tensor]]: + """Perform prediction for a single image :param image: PIL image :type image: Image.Image - :return: segmenation result as PIL image - :rtype: Image.Image + :param return_sample: Whether to return the sample data along with predictions, defaults to False + :type return_sample: bool, optional + :return: Segmentation result as a PIL image or a tuple with the segmentation result and the input sample tensor + :rtype: Union[Image.Image, Tuple[Image.Image, torch.Tensor]] """ - tensor = self.transform_input(image).unsqueeze(0).to(self.device) + sample = self.transform_input(image).unsqueeze(0).to(self.device) + result = self.inference(sample) + result = self.transform_output(result) + + if return_sample: + return result, sample + else: + return result + def inference(self, tensor_in: torch.Tensor) -> torch.Tensor: + """Perform inference for a tensor + + :param tensor_in: Input point cloud tensor + :type tensor_in: torch.Tensor + :return: Segmentation result as tensor + :rtype: torch.Tensor + """ with torch.no_grad(): # Perform inference if hasattr(self.model, "inference"): # e.g. mmsegmentation models - result = self.model.inference( - tensor.to(self.device), + tensor_out = self.model.inference( + tensor_in.to(self.device), [ dict( - ori_shape=tensor.shape[2:], - img_shape=tensor.shape[2:], - pad_shape=tensor.shape[2:], + ori_shape=tensor_in.shape[2:], + img_shape=tensor_in.shape[2:], + pad_shape=tensor_in.shape[2:], padding_size=[0, 0, 0, 0], ) ] - * tensor.shape[0], + * tensor_in.shape[0], ) else: - result = self.model(tensor.to(self.device)) + tensor_out = self.model(tensor_in.to(self.device)) - if isinstance(result, dict): - result = result["out"] + if isinstance(tensor_out, dict): + tensor_out = tensor_out["out"] - return self.transform_output(result) + return tensor_out def eval( self, dataset: dm_segmentation_dataset.ImageSegmentationDataset, - split: str | List[str] = "test", + split: Union[str, List[str]] = "test", ontology_translation: Optional[str] = None, predictions_outdir: Optional[str] = None, results_per_sample: bool = False, @@ -486,7 +385,7 @@ def eval( :param dataset: Image segmentation dataset for which the evaluation will be performed :type dataset: ImageSegmentationDataset :param split: Split or splits to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies :type ontology_translation: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. @@ -507,7 +406,9 @@ def eval( os.makedirs(predictions_outdir, exist_ok=True) # Build a LUT for transforming ontology if needed - lut_ontology = self.get_lut_ontology(dataset.ontology, ontology_translation) + lut_ontology = uc.get_ontology_conversion_lut( + self.ontology, dataset.ontology, ontology_translation + ) lut_ontology = torch.tensor(lut_ontology, dtype=torch.int64).to(self.device) # Retrieve ignored label indices @@ -537,24 +438,7 @@ def eval( pbar = tqdm(dataloader, leave=True) for idx, image, label in pbar: # Perform inference - if hasattr(self.model, "inference"): # e.g. mmsegmentation models - pred = self.model.inference( - image.to(self.device), - [ - dict( - ori_shape=image.shape[2:], - img_shape=image.shape[2:], - pad_shape=image.shape[2:], - padding_size=[0, 0, 0, 0], - ) - ] - * image.shape[0], - ) - else: - pred = self.model(image.to(self.device)) - - if isinstance(pred, dict): - pred = pred["out"] + pred = self.inference(image) # Get valid points masks depending on ignored label indices if ignored_label_indices: @@ -585,7 +469,9 @@ def eval( sample_valid_mask = ( valid_mask[i] if valid_mask is not None else None ) - sample_mf = um.SegmentationMetricsFactory(n_classes=self.n_classes) + sample_mf = um.SegmentationMetricsFactory( + n_classes=self.n_classes + ) sample_mf.update( sample_pred, sample_label, sample_valid_mask ) @@ -617,10 +503,38 @@ def get_computational_cost( :type warm_up_runs: int, optional :return: Dictionary containing computational cost information """ + # Create dummy input dummy_input = torch.randn(1, 3, *image_size).to(self.device) - return get_computational_cost( - self.model, dummy_input, self.model_fname, runs, warm_up_runs - ) + + # Get model size if possible + if self.model_fname is not None: + size_mb = os.path.getsize(self.model_fname) / 1024**2 + else: + size_mb = None + + # Measure inference time with GPU synchronization + dummy_tuple = dummy_input if isinstance(dummy_input, tuple) else (dummy_input,) + + for _ in range(warm_up_runs): + self.inference(dummy_tuple[0]) + + inference_times = [] + for _ in range(runs): + torch.cuda.synchronize() + start_time = time.time() + self.inference(dummy_tuple[0]) + torch.cuda.synchronize() + end_time = time.time() + inference_times.append(end_time - start_time) + + result = { + "input_shape": ["x".join(map(str, ut.get_data_shape(dummy_input)))], + "n_params": [sum(p.numel() for p in self.model.parameters())], + "size_mb": [size_mb], + "inference_time_s": [np.mean(inference_times)], + } + + return pd.DataFrame.from_dict(result) class TorchLiDARSegmentationModel(dm_segmentation_model.LiDARSegmentationModel): @@ -655,6 +569,7 @@ def __init__( print("Model is not a TorchScript model. Loading as a PyTorch module.") model = torch.load(model, map_location=self.device) model_type = "native" + # Otherwise, check that it is a PyTorch module elif isinstance(model, torch.nn.Module): model_fname = None @@ -666,98 +581,83 @@ def __init__( super().__init__(model, model_type, model_cfg, ontology_fname, model_fname) self.model = self.model.to(self.device).eval() + # Init specific attributes and update model configuration + self.model_format = self.model_cfg["model_format"] + # Init model specific functions - if self.model_cfg["input_format"] == "o3d_randlanet": # Open3D RandLaNet - self.preprocess = tmu.preprocess - self.transform_input = tmu.o3d_randlanet.transform_input - self.update_probs = tmu.o3d_randlanet.update_probs - self.model_cfg["num_layers"] = sum(1 for _ in self.model.decoder.children()) - if self.model_cfg["input_format"] == "o3d_kpconv": # Open3D KPConv - self.preprocess = tmu.preprocess - self.transform_input = tmu.o3d_kpconv.transform_input - self.update_probs = tmu.o3d_kpconv.update_probs + model_format = self.model_format.split("_")[0] + model_utils_module_str = f"detectionmetrics.models.utils.{model_format}" + try: + model_utils_module = importlib.import_module(model_utils_module_str) + except ImportError: + raise_unknown_model_format_lidar(self.model_format) + self._get_sample = model_utils_module.get_sample + self._inference = model_utils_module.inference + if hasattr(model_utils_module, "reset_sampler"): + self._reset_sampler = model_utils_module.reset_sampler else: - self.preprocess = tmu.preprocess - self.transform_input = tmu.transform_input - self.update_probs = tmu.update_probs - - # Transformation for output labels - self.transform_output = ( - lambda x: torch.argmax(x.squeeze(), axis=-1).squeeze().to(torch.uint8) - ) + self._reset_sampler = None - def inference(self, points: np.ndarray) -> np.ndarray: - """Perform inference for a single point cloud + def inference( + self, + sample: dict, + model: torch.nn.Module, + model_cfg: dict, + measure_processing_time: bool = False, + ) -> Tuple[Tuple[torch.Tensor, Optional[torch.Tensor], List[str]], Optional[dict]]: + """Perform inference for a sample + + :param sample: Sample data + :type sample: dict + :param model: PyTorch model + :type model: torch.nn.Module + :param model_cfg: Dictionary containing model configuration + :type model_cfg: dict + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: tuple of (predictions, labels, names) and processing time dictionary (if measured) + :rtype: Tuple[Tuple[torch.Tensor, Optional[torch.Tensor], List[str]], Optional[dict]] + """ + return self._inference(sample, model, model_cfg, measure_processing_time) - :param points: Point cloud xyz array - :type points: np.ndarray - :return: Segmenation result as a point cloud with label indices - :rtype: np.ndarray + def predict( + self, + points_fname: str, + has_intensity: bool = True, + return_sample: bool = False, + ignore_index: Optional[List[int]] = None, + ) -> Union[np.ndarray, Tuple[np.ndarray, Any]]: + """Perform prediction for a single point cloud + + :param points_fname: Point cloud in SemanticKITTI .bin format + :type points_fname: str + :param has_intensity: Whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param return_sample: Whether to return the sample data along with predictions, defaults to False + :type return_sample: bool, optional + :param ignore_index: List of class indices to ignore during prediction, defaults to None + :type ignore_index: Optional[List[int]], optional + :return: Segmentation result as a numpy array or a tuple with the segmentation result and the input sample data + :rtype: Union[np.ndarray, Tuple[np.ndarray, Any]] """ # Preprocess point cloud - points, search_tree, projected_indices = self.preprocess(points, self.model_cfg) - - # Init sampler if needed - sampler = None - if "sampler" in self.model_cfg: - end_th = self.model_cfg.get("end_th", 0.5) - sampler = ul.Sampler( - points.shape[0], - search_tree, - self.model_cfg["sampler"], - self.n_classes, - ) - - # Iterate over the sampled point cloud until all points reach the end threshold. - # If no sampler is provided, the inference is performed in a single step. - infer_complete = False - while not infer_complete: - # Get model input data - input_data, selected_indices = self.transform_input( - points, self.model_cfg, sampler - ) - input_data = data_to_device(input_data, self.device) - if self.model_cfg["input_format"] != "o3d_kpconv": - input_data = unsqueeze_data(input_data) - - # Perform inference - with torch.no_grad(): - result = self.model(*input_data) - - # TODO: check if this is consistent across different models - if isinstance(result, dict): - result = result["out"] - - # Update probabilities if sampler is used - if sampler is not None: - if self.model_cfg["input_format"] == "o3d_kpconv": - sampler.test_probs = self.update_probs( - result, - selected_indices, - sampler.test_probs, - lengths=input_data[-1], - ) - else: - sampler.test_probs = self.update_probs( - result, - selected_indices, - sampler.test_probs, - self.n_classes, - ) - if sampler.p[sampler.p > end_th].shape[0] == sampler.p.shape[0]: - result = sampler.test_probs[projected_indices] - infer_complete = True - else: - result = result.squeeze().cpu()[projected_indices].cuda() - infer_complete = True + sample = self._get_sample( + points_fname, self.model_cfg, has_intensity=has_intensity + ) + result, _, _ = self.inference(sample, self.model, self.model_cfg, ignore_index) + result = result.squeeze().cpu().numpy() - return self.transform_output(result).cpu().numpy() + if return_sample: + return result, sample + else: + return result def eval( self, dataset: dm_segmentation_dataset.LiDARSegmentationDataset, - split: str | List[str] = "test", + split: Union[str, List[str]] = "test", ontology_translation: Optional[str] = None, + translation_direction: str = "dataset_to_model", predictions_outdir: Optional[str] = None, results_per_sample: bool = False, ) -> pd.DataFrame: @@ -766,9 +666,11 @@ def eval( :param dataset: LiDAR segmentation dataset for which the evaluation will be performed :type dataset: LiDARSegmentationDataset :param split: Split or splits to be used from the dataset, defaults to "test" - :type split: str | List[str], optional + :type split: Union[str, List[str]], optional :param ontology_translation: JSON file containing translation between dataset and model output ontologies - :type ontology_translation: str, optional + :type ontology_translation: Optional[str], optional + :param translation_direction: Direction of the ontology translation, either 'dataset_to_model' or 'model_to_dataset', defaults to "dataset_to_model" + :type translation_direction: str, optional :param predictions_outdir: Directory to save predictions per sample, defaults to None. If None, predictions are not saved. :type predictions_outdir: Optional[str], optional :param results_per_sample: Whether to store results per sample or not, defaults to False. If True, predictions_outdir must be provided. @@ -787,75 +689,50 @@ def eval( os.makedirs(predictions_outdir, exist_ok=True) # Build a LUT for transforming ontology if needed - lut_ontology = self.get_lut_ontology(dataset.ontology, ontology_translation) - lut_ontology = torch.tensor(lut_ontology, dtype=torch.int64).to(self.device) + eval_ontology = self.ontology + + if ontology_translation is not None: + ontology_translation = uio.read_json(ontology_translation) + if translation_direction == "dataset_to_model": + lut_ontology = uc.get_ontology_conversion_lut( + dataset.ontology, self.ontology, ontology_translation + ) + else: + eval_ontology = dataset.ontology + lut_ontology = uc.get_ontology_conversion_lut( + self.ontology, dataset.ontology, ontology_translation + ) + + lut_ontology = torch.tensor(lut_ontology, dtype=torch.int64).to(self.device) + else: + lut_ontology = None + + n_classes = len(eval_ontology) # Retrieve ignored label indices ignored_label_indices = [] for ignored_class in self.model_cfg.get("ignored_classes", []): ignored_label_indices.append(dataset.ontology[ignored_class]["idx"]) - # Get PyTorch dataset (no dataloader to avoid complexity with batching samplers) + # Get PyTorch dataloader dataset = LiDARSegmentationTorchDataset( dataset, - model_cfg=self.model_cfg, - preprocess=self.preprocess, - n_classes=self.n_classes, + self.model_cfg, + self._get_sample, splits=[split] if isinstance(split, str) else split, ) # Init metrics - metrics_factory = um.SegmentationMetricsFactory(self.n_classes) + metrics_factory = um.SegmentationMetricsFactory(n_classes) # Evaluation loop - end_th = self.model_cfg.get("end_th", 0.5) with torch.no_grad(): pbar = tqdm(dataset, total=len(dataset), leave=True) - for idx, points, projected_indices, (label, _), sampler in pbar: - # Iterate over the sampled point cloud until all points reach the end - # threshold. If no sampler is provided, the inference is performed in a - # single step. - infer_complete = False - while not infer_complete: - # Get model input data - input_data, selected_indices = self.transform_input( - points, self.model_cfg, sampler - ) - input_data = data_to_device(input_data, self.device) - if self.model_cfg["input_format"] != "o3d_kpconv": - input_data = unsqueeze_data(input_data) - - # Perform inference - pred = self.model(*input_data) - - # TODO: check if this is consistent across different models - if isinstance(pred, dict): - pred = pred["out"] - - if sampler is not None: - if self.model_cfg["input_format"] == "o3d_kpconv": - sampler.test_probs = self.update_probs( - pred, - selected_indices, - sampler.test_probs, - lengths=input_data[-1], - ) - else: - sampler.test_probs = self.update_probs( - pred, - selected_indices, - sampler.test_probs, - self.n_classes, - ) - if sampler.p[sampler.p > end_th].shape[0] == sampler.p.shape[0]: - pred = sampler.test_probs[projected_indices] - infer_complete = True - else: - pred = pred.squeeze().cpu()[projected_indices].cuda() - infer_complete = True + for sample in pbar: + # Perform inference + pred, label, name = self.inference(sample, self.model, self.model_cfg) # Get valid points masks depending on ignored label indices - label = torch.tensor(label, device=self.device) if ignored_label_indices: valid_mask = torch.ones_like(label, dtype=torch.bool) for idx in ignored_label_indices: @@ -865,70 +742,118 @@ def eval( # Convert labels if needed if lut_ontology is not None: - label = lut_ontology[label] + if translation_direction == "dataset_to_model": + label = lut_ontology[label] + else: + pred = lut_ontology[pred] # Prepare data and update metrics factory - label = label.cpu().unsqueeze(0).numpy() - pred = self.transform_output(pred) - pred = pred.cpu().unsqueeze(0).to(torch.int64).numpy() + label = label.cpu().numpy() + pred = pred.cpu().numpy() if valid_mask is not None: - valid_mask = valid_mask.cpu().unsqueeze(0).numpy() + valid_mask = valid_mask.cpu().numpy() metrics_factory.update(pred, label, valid_mask) # Store predictions and results per sample if required if predictions_outdir is not None: - for i, (sample_idx, sample_pred, sample_label) in enumerate( - zip(idx, pred, label) + for i, (sample_name, sample_pred, sample_label) in enumerate( + zip(name, pred, label) ): if results_per_sample: sample_valid_mask = ( valid_mask[i] if valid_mask is not None else None ) - sample_mf = um.SegmentationMetricsFactory(n_classes=self.n_classes) + sample_mf = um.SegmentationMetricsFactory(n_classes) sample_mf.update( sample_pred, sample_label, sample_valid_mask ) sample_df = um.get_metrics_dataframe( - sample_mf, self.ontology + sample_mf, eval_ontology ) sample_df.to_csv( - os.path.join(predictions_outdir, f"{sample_idx}.csv") + os.path.join(predictions_outdir, f"{sample_name}.csv") ) pred.tofile( - os.path.join(predictions_outdir, f"{sample_idx}.bin") + os.path.join(predictions_outdir, f"{sample_name}.bin") ) - return um.get_metrics_dataframe(metrics_factory, self.ontology) + return um.get_metrics_dataframe(metrics_factory, eval_ontology) - def get_computational_cost(self, runs: int = 30, warm_up_runs: int = 5) -> dict: + def get_computational_cost( + self, + point_cloud_range: Tuple[int, int, int, int, int, int] = ( + -50, + -50, + -5, + 50, + 50, + 5, + ), + num_points: int = 100000, + has_intensity: bool = False, + runs: int = 30, + warm_up_runs: int = 5, + ) -> dict: """Get different metrics related to the computational cost of the model + :param point_cloud_range: Point cloud range (meters), defaults to (-50, -50, -5, 50, 50, 5) + :type point_cloud_range: Tuple[int, int, int, int, int, int], optional + :param num_points: Number of points in the point cloud, defaults to 100000 + :type num_points: int, optional + :param has_intensity: Whether the point cloud has intensity values, defaults to False + :type has_intensity: bool, optional :param runs: Number of runs to measure inference time, defaults to 30 :type runs: int, optional :param warm_up_runs: Number of warm-up runs, defaults to 5 :type warm_up_runs: int, optional :return: Dictionary containing computational cost information """ - # Build dummy input data (process is a bit complex for LiDAR models) - dummy_points = np.random.rand(1000000, 4) - dummy_points, search_tree, _ = self.preprocess(dummy_points, self.model_cfg) - - sampler = None - if "sampler" in self.model_cfg: - sampler = ul.Sampler( - point_cloud_size=dummy_points.shape[0], - search_tree=search_tree, - sampler_name=self.model_cfg["sampler"], - num_classes=self.n_classes, + # Build dummy point cloud using uniform distribution + dummy_points = np.random.uniform( + low=point_cloud_range[0:3], + high=point_cloud_range[3:6], + size=(num_points, 3 + int(has_intensity)), + ).astype(np.float32) + + # Store in a secure temporary .bin file + with tempfile.NamedTemporaryFile(suffix=".bin") as tmp_file: + dummy_points.tofile(tmp_file.name) + sample = self._get_sample( + tmp_file.name, self.model_cfg, has_intensity=has_intensity ) - dummy_input, _ = self.transform_input(dummy_points, self.model_cfg, sampler) - dummy_input = data_to_device(dummy_input, self.device) - if self.model_cfg["input_format"] != "o3d_kpconv": - dummy_input = unsqueeze_data(dummy_input) - - # Get computational cost - return get_computational_cost( - self.model, dummy_input, self.model_fname, runs, warm_up_runs - ) + # Get model size if possible + if self.model_fname is not None: + size_mb = os.path.getsize(self.model_fname) / 1024**2 + else: + size_mb = None + + # Measure inference time with GPU synchronization + for _ in range(warm_up_runs): + if "o3d" in self.model_format: # reset random sampling for Open3D-ML models + subsampled_points, _, sampler, _, _, _ = sample + self._reset_sampler(sampler, subsampled_points.shape[0], self.n_classes) + + self.inference(sample, self.model, self.model_cfg) + + inference_times = [] + for _ in range(runs): + if "o3d" in self.model_format: # reset random sampling for Open3D-ML models + subsampled_points, _, sampler, _, _, _ = sample + self._reset_sampler(sampler, subsampled_points.shape[0], self.n_classes) + torch.cuda.synchronize() + start_time = time.time() + self.inference(sample, self.model, self.model_cfg) + torch.cuda.synchronize() + end_time = time.time() + inference_times.append(end_time - start_time) + + result = { + "input_shape": ["x".join(map(str, ut.get_data_shape(dummy_points)))], + "n_params": [sum(p.numel() for p in self.model.parameters())], + "size_mb": [size_mb], + "inference_time_s": [np.mean(inference_times)], + } + + return pd.DataFrame.from_dict(result) diff --git a/detectionmetrics/models/utils/__init__.py b/detectionmetrics/models/utils/__init__.py new file mode 100644 index 00000000..a706d9f3 --- /dev/null +++ b/detectionmetrics/models/utils/__init__.py @@ -0,0 +1,19 @@ +try: + from detectionmetrics.models.utils import o3d +except ImportError: + pass + +try: + from detectionmetrics.models.utils import mmdet3d +except ImportError: + pass + +try: + from detectionmetrics.models.utils import lsk3dnet +except ImportError: + pass + +try: + from detectionmetrics.models.utils import sphereformer +except ImportError: + pass diff --git a/detectionmetrics/models/utils/lsk3dnet.py b/detectionmetrics/models/utils/lsk3dnet.py new file mode 100644 index 00000000..c5a99e5d --- /dev/null +++ b/detectionmetrics/models/utils/lsk3dnet.py @@ -0,0 +1,298 @@ +import time +from typing import List, Optional, Tuple + +from c_gen_normal_map import gen_normal_map +import numpy as np +import torch +import utils.depth_map_utils as depth_map_utils + +import detectionmetrics.utils.torch as ut +import detectionmetrics.utils.lidar as ul + + +def range_projection(current_vertex, fov_up=3.0, fov_down=-25.0, proj_H=64, proj_W=900): + """Project a pointcloud into a spherical projection (range image).""" + # laser parameters + fov_up = fov_up / 180.0 * np.pi # field of view up in radians + fov_down = fov_down / 180.0 * np.pi # field of view down in radians + fov = abs(fov_down) + abs(fov_up) # get field of view total in radians + + # get depth of all points + depth = np.linalg.norm(current_vertex[:, :3], 2, axis=1) + + # get scan components + scan_x = current_vertex[:, 0] + scan_y = current_vertex[:, 1] + scan_z = current_vertex[:, 2] + + # get angles of all points + yaw = -np.arctan2(scan_y, scan_x) + pitch = np.arcsin(scan_z / depth) + + # get projections in image coords + proj_x = 0.5 * (yaw / np.pi + 1.0) # in [0.0, 1.0] + proj_y = 1.0 - (pitch + abs(fov_down)) / fov # in [0.0, 1.0] + + # scale to image size using angular resolution + proj_x *= proj_W # in [0.0, W] + proj_y *= proj_H # in [0.0, H] + + # round and clamp for use as index + proj_x = np.floor(proj_x) + proj_x = np.minimum(proj_W - 1, proj_x) + proj_x = np.maximum(0, proj_x).astype(np.int32) # in [0,W-1] + from_proj_x = np.copy(proj_x) # store a copy in orig order + + proj_y = np.floor(proj_y) + proj_y = np.minimum(proj_H - 1, proj_y) + proj_y = np.maximum(0, proj_y).astype(np.int32) # in [0,H-1] + from_proj_y = np.copy(proj_y) # stope a copy in original order + + # order in decreasing depth + order = np.argsort(depth)[::-1] + depth = depth[order] + + proj_y = proj_y[order] + proj_x = proj_x[order] + + scan_x = scan_x[order] + scan_y = scan_y[order] + scan_z = scan_z[order] + + indices = np.arange(depth.shape[0]) + indices = indices[order] + + proj_range = np.full((proj_H, proj_W), -1, dtype=np.float32) + proj_vertex = np.full((proj_H, proj_W, 4), -1, dtype=np.float32) + proj_idx = np.full((proj_H, proj_W), -1, dtype=np.int32) + + proj_range[proj_y, proj_x] = depth + proj_vertex[proj_y, proj_x] = np.array( + [scan_x, scan_y, scan_z, np.ones(len(scan_x))] + ).T + proj_idx[proj_y, proj_x] = indices + + return proj_range, proj_vertex, from_proj_x, from_proj_y + + +def compute_normals_range( + current_vertex, proj_H=64, proj_W=900, extrapolate=True, blur_type="gaussian" +): + """Compute normals for each point using range image-based method.""" + proj_range, proj_vertex, from_proj_x, from_proj_y = range_projection(current_vertex) + proj_range = depth_map_utils.fill_in_fast( + proj_range, extrapolate=extrapolate, blur_type=blur_type + ) + + # generate normal image + normal_data = gen_normal_map(proj_range, proj_vertex, proj_H, proj_W) + unproj_normal_data = normal_data[from_proj_y, from_proj_x] + + return unproj_normal_data + + +def collate_fn(samples: List[dict]) -> dict: + """Collate function for batching samples + + :param samples: list of sample dictionaries + :type samples: List[dict] + :return: collated batch dictionary + :rtype: dict + """ + point_num = [d["point_num"] for d in samples] + batch_size = len(point_num) + ref_labels = samples[0]["ref_label"] + origin_len = samples[0]["origin_len"] + ref_indices = [torch.from_numpy(d["ref_index"]) for d in samples] + path = samples[0]["root"] # [d['root'] for d in data] + root = [d["root"] for d in samples] + sample_id = [d["sample_id"] for d in samples] + + b_idx = [] + for i in range(batch_size): + b_idx.append(torch.ones(point_num[i]) * i) + points = [torch.from_numpy(d["point_feat"]) for d in samples] + ref_xyz = [torch.from_numpy(d["ref_xyz"]) for d in samples] + + has_labels = samples[0]["point_label"] is not None + if has_labels: + labels = [torch.from_numpy(d["point_label"]) for d in samples] + else: + labels = [d["point_label"] for d in samples] + normal = [torch.from_numpy(d["normal"]) for d in samples] + + return { + "points": torch.cat(points).float(), + "normal": torch.cat(normal).float(), + "ref_xyz": torch.cat(ref_xyz).float(), + "batch_idx": torch.cat(b_idx).long(), + "batch_size": batch_size, + "labels": torch.cat(labels).long().squeeze(1) if has_labels else labels, + "raw_labels": torch.from_numpy(ref_labels).long() if has_labels else ref_labels, + "origin_len": origin_len, + "indices": torch.cat(ref_indices).long(), + "path": path, + "point_num": point_num, + "root": root, + "sample_id": sample_id, + } + + +def get_sample( + points_fname: str, + model_cfg: dict, + label_fname: Optional[str] = None, + name: Optional[str] = None, + idx: Optional[int] = None, + has_intensity: bool = True, + measure_processing_time: bool = False, +) -> Tuple[dict, Optional[dict]]: + """Get sample data for mmdetection3d models + + :param points_fname: filename of the point cloud + :type points_fname: str + :param model_cfg: model configuration + :type model_cfg: dict + :param label_fname: filename of the semantic label, defaults to None + :type label_fname: Optional[str], optional + :param name: sample name, defaults to None + :type name: Optional[str], optional + :param idx: sample numerical index, defaults to None + :type idx: Optional[int], optional + :param has_intensity: whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: sample data dictionary and processing time dictionary (if measured) + :rtype: Tuple[dict, Optional[dict]] + """ + raw_data = ul.read_semantickitti_points(points_fname, has_intensity) + + labels, ref_labels = None, None + if label_fname is not None: + labels, _ = ul.read_semantickitti_label(label_fname) + labels = labels.reshape((-1, 1)).astype(np.uint8) + ref_labels = labels.copy() + + if measure_processing_time: + start = time.perf_counter() + + xyz = raw_data[:, :3] + feat = raw_data[:, 3:4] if model_cfg["n_feats"] > 3 else None + origin_len = len(xyz) + + ref_pc = xyz.copy() + ref_index = np.arange(len(ref_pc)) + + mask_x = np.logical_and( + xyz[:, 0] > model_cfg["min_volume_space"][0], + xyz[:, 0] < model_cfg["max_volume_space"][0], + ) + mask_y = np.logical_and( + xyz[:, 1] > model_cfg["min_volume_space"][1], + xyz[:, 1] < model_cfg["max_volume_space"][1], + ) + mask_z = np.logical_and( + xyz[:, 2] > model_cfg["min_volume_space"][2], + xyz[:, 2] < model_cfg["max_volume_space"][2], + ) + mask = np.logical_and(mask_x, np.logical_and(mask_y, mask_z)) + + not_zero = np.logical_not(np.all(xyz[:, :3] == 0, axis=1)) + mask = np.logical_and(mask, not_zero) + + xyz = xyz[mask] + if labels is not None: + labels = labels[mask] + ref_index = ref_index[mask] + if feat is not None: + feat = feat[mask] + point_num = len(xyz) + + feat = np.concatenate((xyz, feat), axis=1) if feat is not None else xyz + + unproj_normal_data = compute_normals_range(feat) + + if measure_processing_time: + end = time.perf_counter() + processing_time = {"preprocessing": end - start} + + sample = {} + sample["point_feat"] = feat + sample["point_label"] = labels + sample["ref_xyz"] = ref_pc + sample["ref_label"] = ref_labels + sample["ref_index"] = ref_index + sample["point_num"] = point_num + sample["origin_len"] = origin_len + sample["normal"] = unproj_normal_data + sample["root"] = points_fname + sample["sample_id"] = name + sample["idx"] = idx + + if measure_processing_time: + return sample, processing_time + + return sample + + +def inference( + sample: dict, + model: torch.nn.Module, + model_cfg: dict, + ignore_index: Optional[List[int]] = None, + measure_processing_time: bool = False, +) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[List[str]]]: + """Perform inference on a sample using an LSK3DNet model + + :param sample: sample data dictionary + :type sample: dict + :param model: LSK3DNet model + :type model: torch.nn.Module + :param model_cfg: model configuration + :type model_cfg: dict + :param ignore_index: list of class indices to ignore during inference, defaults to None + :type ignore_index: Optional[List[int]], optional + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: tuple of (predictions, labels, names) and processing time dictionary (if measured) + :rtype: Tuple[Tuple[torch.Tensor, Optional[torch.Tensor], List[str]], Optional[dict]] + """ + single_sample = not isinstance(sample["sample_id"], list) + if single_sample: + sample = collate_fn([sample]) + + device = next(model.parameters()).device + for k, v in sample.items(): + sample[k] = ut.data_to_device(v, device) + + if measure_processing_time: + torch.cuda.synchronize() + start = time.perf_counter() + pred = model(sample) + if measure_processing_time: + torch.cuda.synchronize() + end = time.perf_counter() + processing_time = {"inference_n_voxelization": end - start} + + if ignore_index is not None: + pred["logits"][:, ignore_index] = -1e9 + pred["logits"] = torch.argmax(pred["logits"], dim=1) + + has_labels = pred["labels"][0] is not None + preds, labels, names = ([], [], []) if has_labels else ([], None, None) + + for batch_idx in range(pred["batch_size"]): + preds.append(pred["logits"][pred["batch_idx"] == batch_idx]) + if has_labels: + labels.append(pred["labels"][pred["batch_idx"] == batch_idx]) + names.append(pred["sample_id"][batch_idx]) + + preds = torch.stack(preds, dim=0).squeeze() + if has_labels: + labels = torch.stack(labels, dim=0).squeeze() + + if measure_processing_time: + return (preds, labels, names), processing_time + + return preds, labels, names diff --git a/detectionmetrics/models/utils/mmdet3d.py b/detectionmetrics/models/utils/mmdet3d.py new file mode 100644 index 00000000..2dc6bea8 --- /dev/null +++ b/detectionmetrics/models/utils/mmdet3d.py @@ -0,0 +1,153 @@ +import time +from typing import List, Optional, Tuple + +from mmdet3d.datasets.transforms import ( + LoadPointsFromFile, + LoadAnnotations3D, + Pack3DDetInputs, +) +from mmengine.registry import FUNCTIONS +import torch +from torchvision.transforms import Compose + +COLLATE_FN = FUNCTIONS.get("pseudo_collate") + + +def get_sample( + points_fname: str, + model_cfg: dict, + label_fname: Optional[str] = None, + name: Optional[str] = None, + idx: Optional[int] = None, + has_intensity: bool = True, + measure_processing_time: bool = False, +) -> Tuple[dict, Optional[dict]]: + """Get sample data for mmdetection3d models + + :param points_fname: filename of the point cloud + :type points_fname: str + :param model_cfg: model configuration + :type model_cfg: dict + :param label_fname: filename of the semantic label, defaults to None + :type label_fname: Optional[str], optional + :param name: sample name, defaults to None + :type name: Optional[str], optional + :param idx: sample numerical index, defaults to None + :type idx: Optional[int], optional + :param has_intensity: whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: sample data and optionally processing time + :rtype: Tuple[ dict, Optional[dict] ] + """ + sample = { + "lidar_points": { + "lidar_path": points_fname, + "num_pts_feats": model_cfg.get("n_feats", 4), + }, + "pts_semantic_mask_path": label_fname, + "sample_id": name, + "sample_idx": idx, + "num_pts_feats": model_cfg.get("n_feats", 4), + "lidar_path": points_fname, + } + + n_feats = sample["num_pts_feats"] + load_dim = 4 if has_intensity else 3 + transforms = [ + LoadPointsFromFile(coord_type="LIDAR", load_dim=load_dim, use_dim=n_feats) + ] + if sample["pts_semantic_mask_path"] is not None: + transforms.append( + LoadAnnotations3D( + with_bbox_3d=False, + with_label_3d=False, + with_seg_3d=True, + seg_3d_dtype="np.uint32", + seg_offset=65536, + dataset_type="semantickitti", + ) + ) + transforms.append( + Pack3DDetInputs( + keys=["points", "pts_semantic_mask"], + meta_keys=["sample_idx", "lidar_path", "num_pts_feats", "sample_id"], + ) + ) + + if measure_processing_time: + start = time.perf_counter() + transforms = Compose(transforms) + sample = transforms(sample) + if measure_processing_time: + end = time.perf_counter() + return sample, {"preprocessing": end - start} + + return sample + + +def inference( + sample: dict, + model: torch.nn.Module, + model_cfg: dict, + ignore_index: Optional[List[int]] = None, + measure_processing_time: bool = False, +) -> Tuple[ + Tuple[torch.Tensor, Optional[torch.Tensor], Optional[List[str]]], Optional[dict] +]: + """Perform inference on a sample using an mmdetection3D model + + :param sample: sample data dictionary + :type sample: dict + :param model: mmdetection3D model + :type model: torch.nn.Module + :param model_cfg: model configuration + :type model_cfg: dict + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :param ignore_index: list of class indices to ignore during inference, defaults to None + :type ignore_index: Optional[List[int]], optional + :return: predictions, labels (if available), sample names and optionally processing time + :rtype: Tuple[ Tuple[torch.Tensor, Optional[torch.Tensor], Optional[List[str]]], Optional[dict] ] + """ + single_sample = not isinstance(sample["data_samples"], list) + if single_sample: + sample = COLLATE_FN([sample]) + + if measure_processing_time: + start = time.perf_counter() + sample = model.data_preprocessor(sample, training=False) + if measure_processing_time: + end = time.perf_counter() + processing_time = {"voxelization": end - start} + + inputs, data_samples = sample["inputs"], sample["data_samples"] + has_labels = hasattr(data_samples[0].gt_pts_seg, "pts_semantic_mask") + + if measure_processing_time: + torch.cuda.synchronize() + start = time.perf_counter() + outputs = model(inputs, data_samples, mode="predict") + if measure_processing_time: + torch.cuda.synchronize() + end = time.perf_counter() + processing_time["inference"] = end - start + + preds, labels, names = ([], [], []) if has_labels else ([], None, None) + for output in outputs: + if ignore_index is not None: + output.pts_seg_logits.pts_seg_logits[ignore_index] = -1e9 + pred = torch.argmax(output.pts_seg_logits.pts_seg_logits, dim=0) + preds.append(pred) + if has_labels: + labels.append(output.gt_pts_seg.pts_semantic_mask) + names.append(output.metainfo["sample_id"]) + preds = torch.stack(preds, dim=0).squeeze() + if has_labels: + labels = torch.stack(labels, dim=0).squeeze() + + if measure_processing_time: + return (preds, labels, names), processing_time + else: + return preds, labels, names diff --git a/detectionmetrics/models/utils/o3d/__init__.py b/detectionmetrics/models/utils/o3d/__init__.py new file mode 100644 index 00000000..945c3578 --- /dev/null +++ b/detectionmetrics/models/utils/o3d/__init__.py @@ -0,0 +1,216 @@ +import time +from typing import Optional, Tuple, Union, Dict + +import numpy as np +import torch + +try: + from open3d._ml3d.datasets.utils import DataProcessing +except Exception: + print("Open3D-ML3D not available") +from sklearn.neighbors import KDTree + +from detectionmetrics.models.utils.o3d import randlanet, kpconv +from detectionmetrics.utils import lidar as ul +import detectionmetrics.utils.torch as ut + + +def inference( + sample: Tuple[np.ndarray, np.ndarray, ul.Sampler], + model: torch.nn.Module, + model_cfg: dict, + measure_processing_time: bool = False, +) -> Union[ + Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str]], + Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str], Dict[str, float]], +]: + """Perform inference on a sample using an Open3D-ML model + + :param sample: sample data dictionary + :type sample: dict + :param model: Open3D-ML model + :type model: torch.nn.Module + :param model_cfg: model configuration + :type model_cfg: dict + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: predicted labels, ground truth labels, sample name and optionally processing time + :rtype: Union[ Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str]], Tuple[torch.Tensor, Optional[torch.Tensor], Optional[str], Dict[str, float]] ] + """ + infer_complete = False + points, projected_indices, sampler, label, name, _ = sample + model_format = model_cfg["model_format"] + end_th = model_cfg.get("end_th", 0.5) + + processing_time = {"preprocessing": 0, "inference": 0, "postprocessing": 0} + + if "kpconv" in model_format: + transform_input = kpconv.transform_input + update_probs = kpconv.update_probs + elif "randlanet" in model_format: + decoder_layers = model.decoder.children() + model_cfg["num_layers"] = sum(1 for _ in decoder_layers) + transform_input = randlanet.transform_input + update_probs = randlanet.update_probs + else: + raise ValueError(f"Unknown model type: {model_format}") + + while not infer_complete: + # Get model input data + if measure_processing_time: + start = time.perf_counter() + input_data, selected_indices = transform_input(points, model_cfg, sampler) + if measure_processing_time: + end = time.perf_counter() + processing_time["preprocessing"] += end - start + + input_data = ut.data_to_device(input_data, model.device) + if "randlanet" in model_format: + input_data = ut.unsqueeze_data(input_data) + + # Perform inference + with torch.no_grad(): + if measure_processing_time: + torch.cuda.synchronize() + start = time.perf_counter() + pred = model(*input_data) + if measure_processing_time: + torch.cuda.synchronize() + end = time.perf_counter() + processing_time["inference"] += end - start + + # TODO: check if this is consistent across different models + if isinstance(pred, dict): + pred = pred["out"] + + # Update probabilities if sampler is used + if measure_processing_time: + start = time.perf_counter() + if sampler is not None: + if "kpconv" in model_format: + sampler.test_probs = update_probs( + pred, + selected_indices, + sampler.test_probs, + lengths=input_data[-1], + ) + else: + sampler.test_probs = update_probs( + pred, + selected_indices, + sampler.test_probs, + model_cfg["n_classes"], + ) + if sampler.p[sampler.p > end_th].shape[0] == sampler.p.shape[0]: + pred = sampler.test_probs[projected_indices] + infer_complete = True + else: + pred = pred.squeeze().cpu()[projected_indices].cuda() + infer_complete = True + if measure_processing_time: + end = time.perf_counter() + processing_time["postprocessing"] += end - start + + if label is not None: + label = torch.from_numpy(label.astype(np.int64)).long().cuda() + + result = torch.argmax(pred.squeeze(), axis=-1), label, name + + # Return processing time if needed + if measure_processing_time: + return result, processing_time + + return result + + +def get_sample( + points_fname: str, + model_cfg: dict, + label_fname: Optional[str] = None, + name: Optional[str] = None, + idx: Optional[int] = None, + has_intensity: bool = True, + measure_processing_time: bool = False, +) -> Tuple[ + Union[ + Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int], + Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int], + Dict[str, float], + ] +]: + """Get sample data for mmdetection3d models + + :param points_fname: filename of the point cloud + :type points_fname: str + :param model_cfg: model configuration + :type model_cfg: dict + :param label_fname: filename of the semantic label, defaults to None + :type label_fname: Optional[str], optional + :param name: sample name, defaults to None + :type name: Optional[str], optional + :param idx: sample numerical index, defaults to None + :type idx: Optional[int], optional + :param has_intensity: whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: sample data and optionally processing time + :rtype: Union[ Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int], Tuple[np.ndarray, np.ndarray, ul.Sampler, np.ndarray, str, int], Dict[str, float] ] + """ + points = ul.read_semantickitti_points(points_fname, has_intensity) + label = None + if label_fname is not None: + label, _ = ul.read_semantickitti_label(label_fname) + + if measure_processing_time: + start = time.perf_counter() + + # Keep only XYZ coordinates + points = np.array(points[:, 0:3], dtype=np.float32) + + # Subsample points using a grid of given size + grid_size = model_cfg.get("grid_size", 0.06) + sub_points = DataProcessing.grid_subsampling(points, grid_size=grid_size) + + # Create search tree so that we can project points back to the original point cloud + search_tree = KDTree(sub_points) + projected_indices = np.squeeze(search_tree.query(points, return_distance=False)) + projected_indices = projected_indices.astype(np.int32) + + # Init sampler + sampler = None + if "sampler" in model_cfg: + sampler = ul.Sampler( + sub_points.shape[0], + search_tree, + model_cfg["sampler"], + model_cfg["n_classes"], + ) + + if measure_processing_time: + end = time.perf_counter() + + sample = sub_points, projected_indices, sampler, label, name, idx + + # Return processing time if needed + if measure_processing_time: + processing_time = {"preprocessing": end - start} + return sample, processing_time + + return sample + + +def reset_sampler(sampler: ul.Sampler, num_points: int, num_classes: int): + """Reset sampler object probabilities + + :param sampler: Sampler object + :type sampler: ul.Sampler + :param num_points: Number of points in the point cloud + :type num_points: int + :param num_classes: Number of semantic classes + :type num_classes: int + """ + sampler.p = np.random.rand(num_points) * 1e-3 + sampler.min_p = float(np.min(sampler.p[-1])) + sampler.test_probs = np.zeros((num_points, num_classes), dtype=np.float32) + return sampler diff --git a/detectionmetrics/models/torch_model_utils/o3d_kpconv.py b/detectionmetrics/models/utils/o3d/kpconv.py similarity index 99% rename from detectionmetrics/models/torch_model_utils/o3d_kpconv.py rename to detectionmetrics/models/utils/o3d/kpconv.py index 01a0ba29..00a64f28 100644 --- a/detectionmetrics/models/torch_model_utils/o3d_kpconv.py +++ b/detectionmetrics/models/utils/o3d/kpconv.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple +from typing import List, Tuple import numpy as np diff --git a/detectionmetrics/models/torch_model_utils/o3d_randlanet.py b/detectionmetrics/models/utils/o3d/randlanet.py similarity index 99% rename from detectionmetrics/models/torch_model_utils/o3d_randlanet.py rename to detectionmetrics/models/utils/o3d/randlanet.py index 8caad287..1210b6a7 100644 --- a/detectionmetrics/models/torch_model_utils/o3d_randlanet.py +++ b/detectionmetrics/models/utils/o3d/randlanet.py @@ -109,4 +109,4 @@ def update_probs( test_probs = torch.tensor(test_probs, device=new_probs.device) test_probs[indices] = weight * test_probs[indices] + (1 - weight) * new_probs - return test_probs + return test_probs \ No newline at end of file diff --git a/detectionmetrics/models/utils/sphereformer.py b/detectionmetrics/models/utils/sphereformer.py new file mode 100644 index 00000000..1cc3a4af --- /dev/null +++ b/detectionmetrics/models/utils/sphereformer.py @@ -0,0 +1,206 @@ +import time +from typing import List, Optional, Tuple + +import numpy as np +import spconv.pytorch as spconv +import torch +from util.data_util import data_prepare + +import detectionmetrics.utils.torch as ut +import detectionmetrics.utils.lidar as ul + + +def collate_fn(samples: List[dict]) -> dict: + """Collate function for batching samples + + :param samples: list of sample dictionaries + :type samples: List[dict] + :return: collated batch dictionary + :rtype: dict + """ + coords, xyz, feats, labels, inds_recons, fnames, sample_ids = list(zip(*samples)) + inds_recons = list(inds_recons) + + accmulate_points_num = 0 + offset = [] + for i in range(len(coords)): + inds_recons[i] = accmulate_points_num + inds_recons[i] + accmulate_points_num += coords[i].shape[0] + offset.append(accmulate_points_num) + + coords = torch.cat(coords) + xyz = torch.cat(xyz) + feats = torch.cat(feats) + if any(label is None for label in labels): + labels = None + else: + labels = torch.cat(labels) + offset = torch.IntTensor(offset) + inds_recons = torch.cat(inds_recons) + + return ( + coords, + xyz, + feats, + labels, + offset, + inds_recons, + list(fnames), + list(sample_ids), + ) + + +def get_sample( + points_fname: str, + model_cfg: dict, + label_fname: Optional[str] = None, + name: Optional[str] = None, + idx: Optional[int] = None, + has_intensity: bool = True, + measure_processing_time: bool = False, +) -> Tuple[dict, Optional[dict]]: + """Get sample data for mmdetection3d models + + :param points_fname: filename of the point cloud + :type points_fname: str + :param model_cfg: model configuration + :type model_cfg: dict + :param label_fname: filename of the semantic label, defaults to None + :type label_fname: Optional[str], optional + :param name: sample name, defaults to None + :type name: Optional[str], optional + :param idx: sample numerical index, defaults to None + :type idx: Optional[int], optional + :param has_intensity: whether the point cloud has intensity values, defaults to True + :type has_intensity: bool, optional + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :return: sample data dictionary and processing time dictionary (if measured) + :rtype: Tuple[dict, Optional[dict]] + """ + feats = ul.read_semantickitti_points(points_fname, has_intensity) + feats = feats[:, : model_cfg["n_feats"]] + + labels_in = None + if label_fname is not None: + annotated_data = np.fromfile(label_fname, dtype=np.uint32) + annotated_data = annotated_data.reshape((-1, 1)) + labels_in = annotated_data.astype(np.uint8).reshape(-1) + + if measure_processing_time: + start = time.perf_counter() + + xyz = feats[:, :3] + xyz = np.clip(xyz, model_cfg["pc_range"][0], model_cfg["pc_range"][1]) + + coords, xyz, feats, labels, inds_reconstruct = data_prepare( + xyz, + feats, + labels_in, + "test", + np.array(model_cfg["voxel_size"]), + model_cfg["voxel_max"], + None, + model_cfg["xyz_norm"], + ) + + if measure_processing_time: + end = time.perf_counter() + processing_time = {"voxelization": end - start} + + sample = ( + coords, + xyz, + feats, + labels, + inds_reconstruct, + points_fname, + name, + ) + + if measure_processing_time: + return sample, processing_time + + return sample + + +def inference( + sample: dict, + model: torch.nn.Module, + model_cfg: dict, + ignore_index: Optional[List[int]] = None, + measure_processing_time: bool = False, +) -> Tuple[Tuple[torch.Tensor, Optional[torch.Tensor], List[str]], Optional[dict]]: + """Perform inference on a sample using an SphereFormer model + + :param sample: sample data dictionary + :type sample: dict + :param model: SphereFormer model + :type model: torch.nn.Module + :param model_cfg: model configuration + :type model_cfg: dict + :param measure_processing_time: whether to measure processing time, defaults to False + :type measure_processing_time: bool, optional + :param ignore_index: list of class indices to ignore during inference, defaults to None + :type ignore_index: Optional[List[int]], optional + :return: tuple of (predictions, labels, names) and processing time dictionary (if measured) + :rtype: Tuple[Tuple[torch.Tensor, Optional[torch.Tensor], List[str]], Optional[dict]] + """ + single_sample = not isinstance(sample[-1], list) + if single_sample: + sample = collate_fn([sample]) + + device = next(model.parameters()).device + sample = ut.data_to_device(sample, device) + + ( + coord, + xyz, + feat, + labels, + offset, + inds_reconstruct, + fnames, + names, + ) = sample + + if measure_processing_time: + start = time.perf_counter() + + offset_ = offset.clone() + offset_[1:] = offset_[1:] - offset_[:-1] + + batch = ( + torch.cat([torch.tensor([ii] * o) for ii, o in enumerate(offset_)], 0) + .long() + .to(device) + ) + + coord = torch.cat([batch.unsqueeze(-1), coord], -1) + spatial_shape = np.clip((coord.max(0)[0][1:] + 1).cpu().numpy(), 128, None) + batch_size = len(fnames) + + sinput = spconv.SparseConvTensor(feat, coord.int(), spatial_shape, batch_size) + if measure_processing_time: + end = time.perf_counter() + processing_time = {"preprocessing": end - start} + start = time.perf_counter() + + if measure_processing_time: + torch.cuda.synchronize() + start = time.perf_counter() + preds = model(sinput, xyz, batch) + if measure_processing_time: + torch.cuda.synchronize() + end = time.perf_counter() + processing_time["inference"] = end - start + + preds = preds[inds_reconstruct, :] + if ignore_index is not None: + preds[:, ignore_index] = -1e9 + preds = torch.argmax(preds, dim=1) + + if measure_processing_time: + return (preds, labels, names), processing_time + + return preds, labels, names diff --git a/detectionmetrics/utils/conversion.py b/detectionmetrics/utils/conversion.py index 9cf8e9ce..1518aec9 100644 --- a/detectionmetrics/utils/conversion.py +++ b/detectionmetrics/utils/conversion.py @@ -57,7 +57,8 @@ def get_ontology_conversion_lut( old_ontology: dict, new_ontology: dict, ontology_translation: Optional[dict] = None, - ignored_classes: Optional[List[str]] = None, + classes_to_remove: Optional[List[str]] = None, + lut_dtype: Optional[np.dtype] = np.uint8, ) -> np.ndarray: """Build a LUT that links old ontology and new ontology indices. If class names don't match between the provided ontologies, user must provide an ontology @@ -69,18 +70,20 @@ def get_ontology_conversion_lut( :type new_ontology: dict :param ontology_translation: Ontology translation dictionary, defaults to None :type ontology_translation: Optional[dict], optional - :param ignored_classes: Classes to ignore from the old ontology, defaults to None - :type ignored_classes: Optional[List[str]], optional + :param classes_to_remove: Classes to be removed from the old ontology, defaults to None + :type classes_to_remove: Optional[List[str]], optional + :param lut_dtype: Type for the ontology conversion LUT, defaults to np.uint8 + :type lut_dtype: Optional[np.dtype], optional :return: numpy array associating old and new ontology indices :rtype: np.ndarray """ - ignored_classes = [] if ignored_classes is None else ignored_classes + classes_to_remove = [] if classes_to_remove is None else classes_to_remove max_idx = max(class_data["idx"] for class_data in old_ontology.values()) - lut = np.zeros((max_idx + 1), dtype=np.uint8) + lut = np.zeros((max_idx + 1), dtype=lut_dtype) if ontology_translation is not None: - # Deleting ignored classes that exist in ontology_translation - for class_name in ignored_classes: + # Deleting requested classes from ontology translation + for class_name in classes_to_remove: if class_name in ontology_translation: del ontology_translation[class_name] @@ -91,7 +94,8 @@ def get_ontology_conversion_lut( lut[old_class_idx] = new_class_idx else: old_ontology = old_ontology.copy() - for class_name in ignored_classes: # Deleting ignored classes from old_ontology + # Deleting classes requested from old ontology + for class_name in classes_to_remove: del old_ontology[class_name] assert set(old_ontology.keys()) == set( # Checking ontology compatibility new_ontology.keys() diff --git a/detectionmetrics/utils/lidar.py b/detectionmetrics/utils/lidar.py index 21331782..3d2b4226 100644 --- a/detectionmetrics/utils/lidar.py +++ b/detectionmetrics/utils/lidar.py @@ -1,6 +1,6 @@ import numpy as np import random -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Union import open3d as o3d from PIL import Image @@ -13,7 +13,21 @@ "front": np.array([1, 0, 0.5], dtype=np.float32), # Camera front vector "lookat": np.array([1, 0.0, 0.0], dtype=np.float32), # Point camera looks at "up": np.array([-0.5, 0, 1], dtype=np.float32), # Camera up direction - } + }, + "top": { + "zoom": 0.025, + "front": np.array([0, 0, -1], dtype=np.float32), # Looking straight down + "lookat": np.array([1, 0.0, 0.0], dtype=np.float32), # Same target point + "up": np.array([0, 1, 0], dtype=np.float32), # Y axis is "up" in image + }, + "side": { + "zoom": 0.012, + "front": np.array( + [0, -1, 0], dtype=np.float32 + ), # Looking from positive Y toward origin + "lookat": np.array([1, 0.0, 0.0], dtype=np.float32), # Same target point + "up": np.array([0, 0, 1], dtype=np.float32), # Z axis is up + }, } @@ -211,11 +225,13 @@ def view_point_cloud(points: np.ndarray, colors: np.ndarray): def render_point_cloud( points: np.ndarray, colors: np.ndarray, - camera_view: str = "3rd_person", + camera_view: Union[str, dict] = "3rd_person", bg_color: Optional[List[float]] = [0.0, 0.0, 0.0, 1.0], color_jitter: float = 0.05, point_size: float = 3.0, resolution: Tuple[int, int] = (1920, 1080), + render_origin: bool = False, + origin_size: float = 0.5, ) -> Image: """Render a given point cloud from a specific camera view and return the image @@ -223,8 +239,8 @@ def render_point_cloud( :type points: np.ndarray :param colors: Colors for the point cloud data :type colors: np.ndarray - :param camera_view: Camera view, defaults to "3rd_person" - :type camera_view: str, optional + :param camera_view: Camera view (either ID or dictionary containing camera definition), defaults to "3rd_person" + :type camera_view: Union[str, dict], optional :param bg_color: Background color, defaults to black -> [0., 0., 0., 1.] :type bg_color: Optional[List[float]], optional :param color_jitter: Jitters the colors by a random value between [-color_jitter, color_jitter], defaults to 0.05 @@ -233,11 +249,20 @@ def render_point_cloud( :type point_size: float, optional :param resolution: Render resolution, defaults to (1920, 1080) :type resolution: Tuple[int, int], optional + :param render_origin: Whether to render the origin axes, defaults to False + :type render_origin: bool, optional + :param origin_size: Size of the origin axes, defaults to 0.5 + :type origin_size: float, optional :return: Rendered point cloud :rtype: Image """ - assert camera_view in CAMERA_VIEWS, f"Camera view {camera_view} not implemented" - view_settings = CAMERA_VIEWS[camera_view] + if isinstance(camera_view, dict): + # If camera_view is a dictionary, use it directly + view_settings = camera_view + elif isinstance(camera_view, str): + # If camera_view is a string, look it up in predefined views + assert camera_view in CAMERA_VIEWS, f"Camera view {camera_view} not implemented" + view_settings = CAMERA_VIEWS[camera_view] # Add color jitter if needed if color_jitter > 0: @@ -258,6 +283,15 @@ def render_point_cloud( material.point_size = point_size renderer.scene.add_geometry("point_cloud", point_cloud, material) + # Add origin axes for reference + if render_origin: + coord_frame = o3d.geometry.TriangleMesh.create_coordinate_frame( + size=origin_size, origin=[0, 0, 0] + ) + coord_material = o3d.visualization.rendering.MaterialRecord() + coord_material.shader = "defaultUnlit" # Also unlit for visibility + renderer.scene.add_geometry("coordinate_frame", coord_frame, coord_material) + # Set the background color renderer.scene.set_background(bg_color) @@ -280,3 +314,36 @@ def render_point_cloud( renderer.scene.clear_geometry() return image + + +def read_semantickitti_points(fname: str, has_intensity: bool = True) -> np.ndarray: + """Read points from a binary file in SemanticKITTI format + + :param fname: Binary file containing points + :type fname: str + :param has_intensity: Whether the points have intensity values, defaults to True + :type has_intensity: bool + :return: Numpy array containing points + :rtype: np.ndarray + """ + points = np.fromfile(fname, dtype=np.float32) + points = points.reshape((-1, 4 if has_intensity else 3)) + if not has_intensity: + empty_intensity = np.zeros((points.shape[0], 1), dtype=np.float32) + points = np.concatenate([points, empty_intensity], axis=1) + return points + + +def read_semantickitti_label(fname: str) -> Tuple[np.ndarray, np.ndarray]: + """Read labels from a binary file in SemanticKITTI format + + :param fname: Binary file containing labels + :type fname: str + :return: Numpy arrays containing semantic and instance labels + :rtype: Tuple[np.ndarray, np.ndarray] + """ + label = np.fromfile(fname, dtype=np.uint32) + label = label.reshape((-1)) + semantic_label = label & 0xFFFF + instance_label = label >> 16 + return semantic_label, instance_label diff --git a/detectionmetrics/utils/segmentation_metrics.py b/detectionmetrics/utils/segmentation_metrics.py index a3d7ff6b..18652f35 100644 --- a/detectionmetrics/utils/segmentation_metrics.py +++ b/detectionmetrics/utils/segmentation_metrics.py @@ -1,6 +1,6 @@ from collections import defaultdict import math -from typing import Optional +from typing import List, Optional, Union import numpy as np import pandas as pd @@ -50,10 +50,6 @@ def update( if valid_mask is not None: mask &= valid_mask - # Update confusion matrix - if np.count_nonzero(gt >= 16): - pass - # Update confusion matrix new_entry = np.bincount( self.n_classes * gt[mask].astype(int) + pred[mask].astype(int), @@ -61,11 +57,11 @@ def update( ) self.confusion_matrix += new_entry.reshape(self.n_classes, self.n_classes) - def get_metric_names(self) -> list[str]: + def get_metric_names(self) -> List[str]: """Get available metric names :return: List of available metric names - :rtype: list[str] + :rtype: List[str] """ return self.METRIC_NAMES @@ -77,58 +73,58 @@ def get_confusion_matrix(self) -> np.ndarray: """ return self.confusion_matrix - def get_tp(self, per_class: bool = True) -> np.ndarray | int: + def get_tp(self, per_class: bool = True) -> Union[np.ndarray, int]: """True Positives :param per_class: Return per class TP, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, int] """ tp = np.diag(self.confusion_matrix) return tp if per_class else int(np.nansum(tp)) - def get_fp(self, per_class: bool = True) -> np.ndarray | int: + def get_fp(self, per_class: bool = True) -> Union[np.ndarray, int]: """False Positives :param per_class: Return per class FP, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, int] """ fp = self.confusion_matrix.sum(axis=0) - np.diag(self.confusion_matrix) return fp if per_class else int(np.nansum(fp)) - def get_fn(self, per_class: bool = True) -> np.ndarray | int: + def get_fn(self, per_class: bool = True) -> Union[np.ndarray, int]: """False negatives :param per_class: Return per class FN, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, int] """ fn = self.confusion_matrix.sum(axis=1) - np.diag(self.confusion_matrix) return fn if per_class else int(np.nansum(fn)) - def get_tn(self, per_class: bool = True) -> np.ndarray | int: + def get_tn(self, per_class: bool = True) -> Union[np.ndarray, int]: """True negatives :param per_class: Return per class TN, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, int] """ total = self.confusion_matrix.sum() tn = total - (self.get_tp() + self.get_fp() + self.get_fn()) return tn if per_class else int(np.nansum(tn)) - def get_precision(self, per_class: bool = True) -> np.ndarray | float: + def get_precision(self, per_class: bool = True) -> Union[np.ndarray, float]: """Precision = TP / (TP + FP) :param per_class: Return per class precision, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, float] """ tp = self.get_tp(per_class) fp = self.get_fp(per_class) @@ -139,13 +135,13 @@ def get_precision(self, per_class: bool = True) -> np.ndarray | float: else: return np.where(denominator > 0, tp / denominator, np.nan) - def get_recall(self, per_class: bool = True) -> np.ndarray | float: + def get_recall(self, per_class: bool = True) -> Union[np.ndarray, float]: """Recall = TP / (TP + FN) :param per_class: Return per class recall, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, float] """ tp = self.get_tp(per_class) fn = self.get_fn(per_class) @@ -156,13 +152,13 @@ def get_recall(self, per_class: bool = True) -> np.ndarray | float: else: return np.where(denominator > 0, tp / denominator, np.nan) - def get_accuracy(self, per_class: bool = True) -> np.ndarray | float: + def get_accuracy(self, per_class: bool = True) -> Union[np.ndarray, float]: """Accuracy = (TP + TN) / (TP + FP + FN + TN) :param per_class: Return per class accuracy, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, float] """ tp = self.get_tp(per_class) fp = self.get_fp(per_class) @@ -175,13 +171,13 @@ def get_accuracy(self, per_class: bool = True) -> np.ndarray | float: else: return np.where(total > 0, (tp + tn) / total, np.nan) - def get_f1_score(self, per_class: bool = True) -> np.ndarray | float: + def get_f1_score(self, per_class: bool = True) -> Union[np.ndarray, float]: """F1-score = 2 * (Precision * Recall) / (Precision + Recall) :param per_class: Return per class F1 score, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, float] """ precision = self.get_precision(per_class) recall = self.get_recall(per_class) @@ -196,13 +192,13 @@ def get_f1_score(self, per_class: bool = True) -> np.ndarray | float: denominator > 0, 2 * (precision * recall) / denominator, np.nan ) - def get_iou(self, per_class: bool = True) -> np.ndarray | float: + def get_iou(self, per_class: bool = True) -> Union[np.ndarray, float]: """IoU = TP / (TP + FP + FN) :param per_class: Return per class IoU, defaults to True :type per_class: bool, optional :return: True Positives - :rtype: np.ndarray | int + :rtype: Union[np.ndarray, float] """ tp = self.get_tp(per_class) fp = self.get_fp(per_class) @@ -242,7 +238,7 @@ def get_averaged_metric( def get_metric_per_name( self, metric_name: str, per_class: bool = True - ) -> np.ndarray | float | int: + ) -> Union[np.ndarray, float, int]: """Get metric value by name :param metric_name: Name of the metric to compute @@ -250,7 +246,7 @@ def get_metric_per_name( :param per_class: Return per class metric, defaults to True :type per_class: bool, optional :return: Metric value - :rtype: np.ndarray | float | int + :rtype: Union[np.ndarray, float, int] """ return getattr(self, f"get_{metric_name}")(per_class=per_class) diff --git a/detectionmetrics/utils/torch.py b/detectionmetrics/utils/torch.py new file mode 100644 index 00000000..ecb7a633 --- /dev/null +++ b/detectionmetrics/utils/torch.py @@ -0,0 +1,65 @@ +from typing import Union + +import torch + + +def data_to_device( + data: Union[tuple, list], device: torch.device +) -> Union[tuple, list]: + """Move provided data to given device (CPU or GPU) + + :param data: Data provided (it can be a single or multiple tensors) + :type data: Union[tuple, list] + :param device: Device to move data to + :type device: torch.device + :return: Data moved to device + :rtype: Union[tuple, list] + """ + if isinstance(data, (tuple, list)): + return type(data)( + d.to(device) if torch.is_tensor(d) else data_to_device(d, device) + for d in data + ) + elif torch.is_tensor(data): + return data.to(device) + else: + return data + + +def get_data_shape(data: Union[tuple, list]) -> Union[tuple, list]: + """Get the shape of the provided data + + :param data: Data provided (it can be a single or multiple tensors) + :type data: Union[tuple, list] + :return: Data shape + :rtype: Union[tuple, list] + """ + if isinstance(data, (tuple, list)): + return type(data)( + tuple(d.shape) if torch.is_tensor(d) else get_data_shape(d) for d in data + ) + elif torch.is_tensor(data): + return tuple(data.shape) + else: + return tuple(data.shape) + + +def unsqueeze_data(data: Union[tuple, list], dim: int = 0) -> Union[tuple, list]: + """Unsqueeze provided data along given dimension + + :param data: Data provided (it can be a single or multiple tensors) + :type data: Union[tuple, list] + :param dim: Dimension that will be unsqueezed, defaults to 0 + :type dim: int, optional + :return: Unsqueezed data + :rtype: Union[tuple, list] + """ + if isinstance(data, (tuple, list)): + return type(data)( + d.unsqueeze(dim) if torch.is_tensor(d) else unsqueeze_data(d, dim) + for d in data + ) + elif torch.is_tensor(data): + return data.unsqueeze(dim) + else: + return data diff --git a/docs/_pages/home.md b/docs/_pages/home.md index 6ebb9570..c44f7b39 100644 --- a/docs/_pages/home.md +++ b/docs/_pages/home.md @@ -49,8 +49,8 @@ Now, we're excited to introduce ***DetectionMetrics v2***! While retaining the f LiDAR - Rellis3D, GOOSE, custom GAIA format - PyTorch (tested with RandLA-Net and KPConv from Open3D-ML) + Rellis3D, GOOSE, WildScenes, custom GAIA format + PyTorch (tested with Open3D-ML, mmdetection3d, SphereFormer, and LSK3DNet models) Object detection Image diff --git a/docs/_pages/v2/compatibility.md b/docs/_pages/v2/compatibility.md index 59b127cb..6db61a15 100644 --- a/docs/_pages/v2/compatibility.md +++ b/docs/_pages/v2/compatibility.md @@ -9,8 +9,10 @@ sidebar: ## Image semantic segmentation - Datasets: + - **[RUGD](http://rugd.vision/)** - **[Rellis3D](https://www.unmannedlab.org/research/RELLIS-3D)** - **[GOOSE](https://goose-dataset.de/)** + - **[WildScenes](https://csiro-robotics.github.io/WildScenes/)** - **Custom GAIA format**: *Parquet* file containing samples and labels relative paths and a JSON file with the dataset ontology. - **Generic**: simply assumes a different directory per split, different suffixes for samples and labels, and a JSON file containing the dataset ontology. - Models: @@ -52,36 +54,115 @@ sidebar: - Datasets: - **[Rellis3D](https://www.unmannedlab.org/research/RELLIS-3D)** - **[GOOSE](https://goose-dataset.de/)** + - **[WildScenes](https://csiro-robotics.github.io/WildScenes/)** - **Custom GAIA format**: *Parquet* file containing samples and labels relative paths and a JSON file with the dataset ontology. - **Generic**: simply assumes a different directory per split, different suffixes for samples and labels, and a JSON file containing the dataset ontology. - Models: - - **PyTorch ([TorchScript](https://pytorch.org/docs/stable/jit.html) compiled format and native modules)**. As of now, we have tested RandLA-Net and KPConv from [Open3D-ML](https://github.com/isl-org/Open3D-ML). + - **PyTorch ([TorchScript](https://pytorch.org/docs/stable/jit.html) compiled format and native modules)**. As of now, we have tested Open3D-ML, mmdetection3d, SphereFormer, and LSK3DNet models. - Input shape: defined by the `input_format` tag. - Output shape: `(num_points)` - - JSON configuration file format: + - JSON configuration file format examples (different depending on the model): ```json { - "seed": 42, - "input_format": "o3d_randlanet", + "model_format": <"o3d_randlanet" | "o3d_kpconv" | "mmdet3d" | "sphereformer" | "lsk3dnet">, + "n_feats": <3|4>, // without/with intensity + "seed": , + // -- EXTRA PARAMETERS PER MODEL (EXAMPLES) -- + // o3d kpconv "sampler": "spatially_regular", + "min_in_points": 10000, + "max_in_points": 20000, + "in_radius": 4.0, "recenter": { "dims": [ 0, - 1 + 1, + 2 ] }, - "ignored_classes": [ - "void" + "first_subsampling_dl": 0.075, + "conv_radius": 2.5, + "architecture": [ + "simple", + "resnetb", + "resnetb_strided", + "resnetb", + "resnetb", + "resnetb_strided", + "resnetb", + "resnetb", + "resnetb_strided", + "resnetb", + "resnetb", + "resnetb_strided", + "resnetb", + "nearest_upsample", + "unary", + "nearest_upsample", + "unary", + "nearest_upsample", + "unary", + "nearest_upsample", + "unary" ], + "num_layers": 5, + "num_points": 45056, + "grid_size": 0.075, + "num_neighbors": 16, + "sub_sampling_ratio": [ + 4, + 4, + 4, + 4 + ], + // o3d randlanet + "sampler": "spatially_regular", + "recenter": { + "dims": [ + 0, + 1 + ] + }, "num_points": 45056, - "grid_size": 0.06, + "grid_size": 0.075, "num_neighbors": 16, "sub_sampling_ratio": [ 4, 4, 4, 4 + ], + // sphereformer + "voxel_size": [ + 0.05, + 0.05, + 0.05 + ], + "voxel_max": 120000, + "pc_range": [ + [ + -22, + -17, + -4 + ], + [ + 30, + 18, + 13 + ] + ], + "xyz_norm": false, + // lsk3dnet + "min_volume_space": [ + -120, + -120, + -6 + ], + "max_volume_space": [ + 120, + 120, + 11 ] } ``` diff --git a/docs/_pages/v2/installation.md b/docs/_pages/v2/installation.md index 2169365b..a5a1f12a 100644 --- a/docs/_pages/v2/installation.md +++ b/docs/_pages/v2/installation.md @@ -49,4 +49,7 @@ Install your deep learning framework of preference in your environment. We have If you are using LiDAR, Open3D currently requires `torch==2.2*`. -And it's done! You can check the `examples` directory for inspiration and run some of the scripts provided either by activating the created environment using `poetry shell` or directly running `poetry run python examples/`. \ No newline at end of file +And it's done! You can check the `examples` directory for inspiration and run some of the scripts provided either by activating the created environment using `poetry shell` or directly running `poetry run python examples/`. + +### Additional environments +Some LiDAR segmentation models, such as SphereFormer and LSK3DNet, require a dedicated installation workflow. Refer to [additional_envs/INSTRUCTIONS.md](additional_envs/INSTRUCTIONS.md) for detailed setup instructions. \ No newline at end of file diff --git a/docs/_pages/v2/usage.md b/docs/_pages/v2/usage.md index 689306a4..d2dcdc46 100644 --- a/docs/_pages/v2/usage.md +++ b/docs/_pages/v2/usage.md @@ -67,7 +67,7 @@ Usage: dm_evaluate [OPTIONS] {segmentation|detection} {image|lidar} Evaluate model on dataset Options: - --model_format [torch|tensorflow|tensorflow_explicit] + --model_format [torch|tensorflow] Trained model format [default: torch] --model PATH Trained model filename (TorchScript) or directory (TensorFlow SavedModel) diff --git a/docs/assets/images/detectionmetricsv2_diagram.png b/docs/assets/images/detectionmetricsv2_diagram.png index d85baea7..02929b62 100644 Binary files a/docs/assets/images/detectionmetricsv2_diagram.png and b/docs/assets/images/detectionmetricsv2_diagram.png differ diff --git a/examples/gaia_image.py b/examples/gaia_image.py index 1718aca1..4a8f3a88 100644 --- a/examples/gaia_image.py +++ b/examples/gaia_image.py @@ -1,4 +1,5 @@ import argparse +import json from detectionmetrics.datasets.gaia import GaiaImageSegmentationDataset @@ -13,6 +14,16 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--dataset", type=str, required=True, help="Parquet dataset file" ) + parser.add_argument( + "--new_ontology", + type=str, + help="New ontology JSON file name", + ) + parser.add_argument( + "--ontology_translation", + type=str, + help="Ontology translation JSON file name", + ) parser.add_argument( "--outdir", type=str, @@ -41,11 +52,26 @@ def main(): """Main function""" args = parse_args() + new_ontology, ontology_translation = None, None + if args.new_ontology is not None: + with open(args.new_ontology, "r", encoding="utf-8") as f: + new_ontology = json.load(f) + + if args.ontology_translation is not None: + with open(args.ontology_translation, "r", encoding="utf-8") as f: + ontology_translation = json.load(f) + dataset = GaiaImageSegmentationDataset(dataset_fname=args.dataset) if args.split: dataset.dataset = dataset.dataset[dataset.dataset["split"] == args.split] dataset.has_label_count = False - dataset.export(outdir=args.outdir, resize=args.resize) + + dataset.export( + outdir=args.outdir, + resize=args.resize, + new_ontology=new_ontology, + ontology_translation=ontology_translation, + ) if __name__ == "__main__": diff --git a/examples/gaia_lidar.py b/examples/gaia_lidar.py index b5e1d8a3..4280bb36 100644 --- a/examples/gaia_lidar.py +++ b/examples/gaia_lidar.py @@ -1,4 +1,5 @@ import argparse +import json from detectionmetrics.datasets.gaia import GaiaLiDARSegmentationDataset @@ -13,6 +14,23 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--dataset", type=str, required=True, help="Parquet dataset file" ) + parser.add_argument( + "--new_ontology", + type=str, + help="New ontology JSON file name", + ) + parser.add_argument( + "--ontology_translation", + type=str, + help="Ontology translation JSON file name", + ) + parser.add_argument( + "--outdir", + type=str, + required=True, + help="Directory where dataset will be stored in common format", + ) + return parser.parse_args() @@ -20,7 +38,22 @@ def main(): """Main function""" args = parse_args() - GaiaLiDARSegmentationDataset(dataset_fname=args.dataset) + new_ontology, ontology_translation = None, None + if args.new_ontology is not None: + with open(args.new_ontology, "r", encoding="utf-8") as f: + new_ontology = json.load(f) + + if args.ontology_translation is not None: + with open(args.ontology_translation, "r", encoding="utf-8") as f: + ontology_translation = json.load(f) + + dataset = GaiaLiDARSegmentationDataset(dataset_fname=args.dataset) + + dataset.export( + args.outdir, + new_ontology=new_ontology, + ontology_translation=ontology_translation, + ) if __name__ == "__main__": diff --git a/examples/goose_lidar.py b/examples/goose_lidar.py index 0ecc9693..3f860663 100644 --- a/examples/goose_lidar.py +++ b/examples/goose_lidar.py @@ -1,4 +1,5 @@ import argparse +import json from detectionmetrics.datasets.goose import GOOSELiDARSegmentationDataset @@ -26,6 +27,16 @@ def parse_args() -> argparse.Namespace: type=str, help="Directory where test dataset split is stored", ) + parser.add_argument( + "--new_ontology", + type=str, + help="New ontology JSON file name", + ) + parser.add_argument( + "--ontology_translation", + type=str, + help="Ontology translation JSON file name", + ) parser.add_argument( "--outdir", type=str, @@ -40,12 +51,25 @@ def main(): """Main function""" args = parse_args() + new_ontology, ontology_translation = None, None + if args.new_ontology is not None: + with open(args.new_ontology, "r", encoding="utf-8") as f: + new_ontology = json.load(f) + + if args.ontology_translation is not None: + with open(args.ontology_translation, "r", encoding="utf-8") as f: + ontology_translation = json.load(f) + dataset = GOOSELiDARSegmentationDataset( train_dataset_dir=args.train_dataset_dir, val_dataset_dir=args.val_dataset_dir, test_dataset_dir=args.test_dataset_dir, ) - dataset.export(args.outdir) + dataset.export( + args.outdir, + new_ontology=new_ontology, + ontology_translation=ontology_translation, + ) if __name__ == "__main__": diff --git a/examples/merge_datasets.py b/examples/merge_datasets.py index 87ce0243..cf9201a3 100644 --- a/examples/merge_datasets.py +++ b/examples/merge_datasets.py @@ -1,6 +1,6 @@ import argparse -from detectionmetrics.datasets.gaia import GaiaImageSegmentationDataset +from detectionmetrics.datasets.gaia import GaiaImageSegmentationDataset, GaiaLiDARSegmentationDataset def parse_args() -> argparse.Namespace: @@ -23,6 +23,13 @@ def parse_args() -> argparse.Namespace: required=True, help="Directory where merged dataset will be stored", ) + parser.add_argument( + "--dataset_type", + type=str, + choices=["image", "lidar"], + required=True, + help="Type of datasets to merge", + ) return parser.parse_args() @@ -31,7 +38,14 @@ def main(): """Main function""" args = parse_args() - datasets = [GaiaImageSegmentationDataset(fname) for fname in args.datasets] + if args.dataset_type == "image": + dataset_class = GaiaImageSegmentationDataset + elif args.dataset_type == "lidar": + dataset_class = GaiaLiDARSegmentationDataset + else: + raise ValueError(f"Unknown dataset type: {args.dataset_type}") + + datasets = [dataset_class(fname) for fname in args.datasets] main_dataset = datasets[0] for extra_dataset in datasets[1:]: main_dataset.append(extra_dataset) diff --git a/examples/rellis3d_lidar.py b/examples/rellis3d_lidar.py index a5a1cc93..cb5bf48e 100644 --- a/examples/rellis3d_lidar.py +++ b/examples/rellis3d_lidar.py @@ -1,4 +1,5 @@ import argparse +import json from detectionmetrics.datasets.rellis3d import Rellis3DLiDARSegmentationDataset @@ -28,6 +29,16 @@ def parse_args() -> argparse.Namespace: required=True, help="YAML file containing dataset ontology", ) + parser.add_argument( + "--new_ontology", + type=str, + help="New ontology JSON file name", + ) + parser.add_argument( + "--ontology_translation", + type=str, + help="Ontology translation JSON file name", + ) parser.add_argument( "--outdir", type=str, @@ -42,12 +53,25 @@ def main(): """Main function""" args = parse_args() + new_ontology, ontology_translation = None, None + if args.new_ontology is not None: + with open(args.new_ontology, "r", encoding="utf-8") as f: + new_ontology = json.load(f) + + if args.ontology_translation is not None: + with open(args.ontology_translation, "r", encoding="utf-8") as f: + ontology_translation = json.load(f) + dataset = Rellis3DLiDARSegmentationDataset( dataset_dir=args.dataset_dir, split_dir=args.split_dir, ontology_fname=args.ontology_fname, ) - dataset.export(args.outdir) + dataset.export( + outdir=args.outdir, + new_ontology=new_ontology, + ontology_translation=ontology_translation, + ) if __name__ == "__main__": diff --git a/examples/store_image_video.py b/examples/store_image_video.py index 7e25d52e..20ab3694 100644 --- a/examples/store_image_video.py +++ b/examples/store_image_video.py @@ -104,7 +104,7 @@ def main(): if model is not None: image = Image.open(sample_data["image"]) - label = model.inference(image) + label = model.predict(image) lut = uc.ontology_to_rgb_lut(model.ontology) else: label = Image.open(sample_data["label"]) diff --git a/examples/store_lidar_video.py b/examples/store_lidar_video.py index e280eadf..eb66ee4e 100644 --- a/examples/store_lidar_video.py +++ b/examples/store_lidar_video.py @@ -112,10 +112,10 @@ def main(): point_cloud = dataset.read_points(sample_data["points"]) if model is not None: - label = model.inference(point_cloud) + label = model.predict(point_cloud) lut = uc.ontology_to_rgb_lut(model.ontology) else: - label, _ = dataset.read_label(sample_data["label"]) + label = dataset.read_label(sample_data["label"]) lut = uc.ontology_to_rgb_lut(dataset.ontology) colors = lut[label] / 255.0 diff --git a/examples/tensorflow_computational_cost.py b/examples/tensorflow_computational_cost.py index f6cef460..5930bee1 100644 --- a/examples/tensorflow_computational_cost.py +++ b/examples/tensorflow_computational_cost.py @@ -1,6 +1,6 @@ import argparse -from detectionmetrics.models.tensorflow import TensorflowImageSegmentationModel +from detectionmetrics.models.tf_segmentation import TensorflowImageSegmentationModel def parse_args() -> argparse.Namespace: diff --git a/examples/tensorflow_image.py b/examples/tensorflow_image.py index 058c2928..ee640360 100644 --- a/examples/tensorflow_image.py +++ b/examples/tensorflow_image.py @@ -4,7 +4,7 @@ from PIL import Image from detectionmetrics.datasets.gaia import GaiaImageSegmentationDataset -from detectionmetrics.models.tensorflow import TensorflowImageSegmentationModel +from detectionmetrics.models.tf_segmentation import TensorflowImageSegmentationModel import detectionmetrics.utils.conversion as uc @@ -73,7 +73,7 @@ def main(): if args.image is not None: image = Image.open(args.image).convert("RGB") - result = model.inference(image) + result = model.predict(image) result = uc.label_to_rgb(result, model.ontology) result.show() diff --git a/examples/torch_image.py b/examples/torch_image.py index aeca7cb1..6410136c 100644 --- a/examples/torch_image.py +++ b/examples/torch_image.py @@ -73,7 +73,7 @@ def main(): if args.image is not None: image = Image.open(args.image).convert("RGB") - result = model.inference(image) + result = model.predict(image) result = uc.label_to_rgb(result, model.ontology) result.show() diff --git a/examples/torch_lidar.py b/examples/torch_lidar.py index 0dfab360..e6351b64 100644 --- a/examples/torch_lidar.py +++ b/examples/torch_lidar.py @@ -57,6 +57,13 @@ def parse_args() -> argparse.Namespace: required=False, help="JSON file containing translation between dataset and model classes", ) + parser.add_argument( + "--translation_direction", + type=str, + choices=["dataset_to_model", "model_to_dataset"], + default="dataset_to_model", + help="Direction of the ontology translation", + ) parser.add_argument( "--predictions_outdir", type=str, @@ -75,16 +82,17 @@ def main(): dataset = GaiaLiDARSegmentationDataset(args.dataset) if args.point_cloud is not None: - point_cloud = dataset.read_points(args.point_cloud) - result = model.inference(point_cloud) + result = model.predict(args.point_cloud) lut = uc.ontology_to_rgb_lut(model.ontology) colors = lut[result] / 255.0 + point_cloud = dataset.read_points(args.point_cloud) ul.view_point_cloud(point_cloud[:, :3], colors) results = model.eval( dataset, split=args.split, ontology_translation=args.ontology_translation, + translation_direction=args.translation_direction, predictions_outdir=args.predictions_outdir, results_per_sample=args.predictions_outdir is not None, ) diff --git a/examples/torch_native_image.py b/examples/torch_native_image.py index ab590098..f74c64c7 100644 --- a/examples/torch_native_image.py +++ b/examples/torch_native_image.py @@ -79,7 +79,7 @@ def main(): if args.image is not None: image = Image.open(args.image).convert("RGB") - result = model.inference(image) + result = model.predict(image) result = uc.label_to_rgb(result, model.ontology) result.show() diff --git a/examples/tutorial_image_segmentation.ipynb b/examples/tutorial_image_segmentation.ipynb index a170caae..274eb651 100644 --- a/examples/tutorial_image_segmentation.ipynb +++ b/examples/tutorial_image_segmentation.ipynb @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -158,7 +158,7 @@ "label = Image.open(label_fname)\n", "label = uc.label_to_rgb(label, dataset.ontology)\n", "\n", - "pred = model.inference(image)\n", + "pred = model.predict(image)\n", "pred = uc.label_to_rgb(pred, model.ontology)\n", "pred = pred.resize(label.size)\n", "\n", diff --git a/pyproject.toml b/pyproject.toml index cfe250bd..3251c6c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ license = "LICENSE" [tool.poetry.dependencies] python = "^3.10" -tqdm = "^4.67.0" +tqdm = "^4.65.0" pandas = "^2.2.3" PyYAML = "^6.0.2" pyarrow = "^18.0.0" @@ -18,7 +18,7 @@ opencv-python-headless = "^4.10.0.84" scikit-learn = "^1.6.0" open3d = "^0.19.0" addict = "^2.4.0" -matplotlib = "^3.10.0" +matplotlib = "^3.6.0" click = "^8.1.8" tensorboard = "^2.18.0" pycocotools = { version = "^2.0.7", markers = "sys_platform != 'win32'" }