ThunderVVV commited on
Commit
b7eedf7
·
1 Parent(s): 9480700

add thirdparty

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. .gitignore +73 -0
  3. app.py +1 -1
  4. thirdparty/DROID-SLAM/.gitignore +158 -0
  5. thirdparty/DROID-SLAM/.gitmodules +6 -0
  6. thirdparty/DROID-SLAM/LICENSE +29 -0
  7. thirdparty/DROID-SLAM/README.md +139 -0
  8. thirdparty/DROID-SLAM/demo.py +135 -0
  9. thirdparty/DROID-SLAM/droid_slam/data_readers/__init__.py +1 -0
  10. thirdparty/DROID-SLAM/droid_slam/data_readers/augmentation.py +58 -0
  11. thirdparty/DROID-SLAM/droid_slam/data_readers/base.py +157 -0
  12. thirdparty/DROID-SLAM/droid_slam/data_readers/factory.py +82 -0
  13. thirdparty/DROID-SLAM/droid_slam/data_readers/rgbd_utils.py +190 -0
  14. thirdparty/DROID-SLAM/droid_slam/data_readers/stream.py +234 -0
  15. thirdparty/DROID-SLAM/droid_slam/data_readers/tartan.py +138 -0
  16. thirdparty/DROID-SLAM/droid_slam/data_readers/tartan_test.txt +32 -0
  17. thirdparty/DROID-SLAM/droid_slam/depth_video.py +197 -0
  18. thirdparty/DROID-SLAM/droid_slam/droid.py +102 -0
  19. thirdparty/DROID-SLAM/droid_slam/droid_backend.py +52 -0
  20. thirdparty/DROID-SLAM/droid_slam/droid_frontend.py +119 -0
  21. thirdparty/DROID-SLAM/droid_slam/droid_net.py +226 -0
  22. thirdparty/DROID-SLAM/droid_slam/factor_graph.py +397 -0
  23. thirdparty/DROID-SLAM/droid_slam/geom/__init__.py +0 -0
  24. thirdparty/DROID-SLAM/droid_slam/geom/ba.py +158 -0
  25. thirdparty/DROID-SLAM/droid_slam/geom/chol.py +73 -0
  26. thirdparty/DROID-SLAM/droid_slam/geom/graph_utils.py +113 -0
  27. thirdparty/DROID-SLAM/droid_slam/geom/losses.py +118 -0
  28. thirdparty/DROID-SLAM/droid_slam/geom/projective_ops.py +139 -0
  29. thirdparty/DROID-SLAM/droid_slam/logger.py +54 -0
  30. thirdparty/DROID-SLAM/droid_slam/modules/__init__.py +0 -0
  31. thirdparty/DROID-SLAM/droid_slam/modules/clipping.py +24 -0
  32. thirdparty/DROID-SLAM/droid_slam/modules/corr.py +140 -0
  33. thirdparty/DROID-SLAM/droid_slam/modules/extractor.py +198 -0
  34. thirdparty/DROID-SLAM/droid_slam/modules/gru.py +34 -0
  35. thirdparty/DROID-SLAM/droid_slam/motion_filter.py +92 -0
  36. thirdparty/DROID-SLAM/droid_slam/trajectory_filler.py +112 -0
  37. thirdparty/DROID-SLAM/droid_slam/vis_headless.py +185 -0
  38. thirdparty/DROID-SLAM/droid_slam/visualization.py +189 -0
  39. thirdparty/DROID-SLAM/environment.yaml +22 -0
  40. thirdparty/DROID-SLAM/environment_novis.yaml +20 -0
  41. thirdparty/DROID-SLAM/evaluation_scripts/test_eth3d.py +134 -0
  42. thirdparty/DROID-SLAM/evaluation_scripts/test_euroc.py +142 -0
  43. thirdparty/DROID-SLAM/evaluation_scripts/test_tum.py +123 -0
  44. thirdparty/DROID-SLAM/evaluation_scripts/validate_tartanair.py +115 -0
  45. thirdparty/DROID-SLAM/misc/DROID.png +3 -0
  46. thirdparty/DROID-SLAM/misc/renderoption.json +40 -0
  47. thirdparty/DROID-SLAM/misc/screenshot.png +3 -0
  48. thirdparty/DROID-SLAM/setup.py +61 -0
  49. thirdparty/DROID-SLAM/src/altcorr_kernel.cu +356 -0
  50. thirdparty/DROID-SLAM/src/correlation_kernels.cu +185 -0
.gitattributes CHANGED
@@ -35,3 +35,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.mp4 filter=lfs diff=lfs merge=lfs -text
37
  *.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.mp4 filter=lfs diff=lfs merge=lfs -text
37
  *.png filter=lfs diff=lfs merge=lfs -text
38
+ thirdparty/Metric3D/media/gifs/demo_1.gif filter=lfs diff=lfs merge=lfs -text
39
+ thirdparty/Metric3D/training/kitti_json_files/eigen_train.json filter=lfs diff=lfs merge=lfs -text
40
+ *.gif filter=lfs diff=lfs merge=lfs -text
41
+ *.jpg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project specific data and submodule
2
+
3
+ /example/video_0
4
+ /weights
5
+
6
+ .vscode/
7
+ **/.DS_Store
8
+ data/pretrain/*.pth
9
+ data/pretrain/*.pth.tar
10
+ data/smpl/SMPL_*.pkl
11
+ *.mov
12
+ example_video/
13
+ /thirdparty/detection
14
+ experiments/
15
+ logs/
16
+ hot3d_*/
17
+ File/
18
+ thirdparty/ZoeDepth
19
+ eval_vis_mdslam/
20
+ eval_vis_*/
21
+ pred_vis/
22
+ _DATA.zip
23
+ train_ddp_process*
24
+ logs*
25
+ /*_trainset_export/
26
+ /*.png
27
+ /*.zip
28
+ /dataset_tars/
29
+ /dataset_untars/
30
+ /datasets/
31
+ /eval_log*/
32
+ *.pth
33
+ *.pkl
34
+ /dataset*/
35
+ /eval*/
36
+ /thirdparty/aitviewer
37
+
38
+ # Byte-compiled / optimized / DLL files
39
+ __pycache__/
40
+
41
+ # Distribution / packaging
42
+ .Python
43
+ build/
44
+ develop-eggs/
45
+ dist/
46
+ downloads/
47
+ eggs/
48
+ .eggs/
49
+ lib64/
50
+ parts/
51
+ sdist/
52
+ var/
53
+ wheels/
54
+ pip-wheel-metadata/
55
+ share/python-wheels/
56
+ *.egg-info/
57
+ .installed.cfg
58
+ *.egg
59
+ MANIFEST
60
+
61
+ # Jupyter Notebook
62
+ .ipynb_checkpoints
63
+ *.ipynb
64
+
65
+ # IPython
66
+ profile_default/
67
+ ipython_config.py
68
+
69
+ # pyenv
70
+ .python-version
71
+
72
+ vis.mp4
73
+ imgui.ini
app.py CHANGED
@@ -121,7 +121,7 @@ header = ('''
121
  <a href="" target="_blank" rel="noopener noreferrer">Jinglei Zhang</a><sup>1</sup>,
122
  <a href="https://jiankangdeng.github.io/" target="_blank" rel="noopener noreferrer">Jiankang Deng</a><sup>2</sup>,
123
  <br>
124
- <a href="https://scholar.google.com/citations?user=syoPhv8AAAAJ&hl=en" target="_blank" rel="noopener noreferrer">Chao Ma</a><sup>1</sup>
125
  <a href="https://rolpotamias.github.io" target="_blank" rel="noopener noreferrer">Rolandos Alexandros Potamias</a><sup>2</sup>
126
  </h3>
127
  <h3>
 
121
  <a href="" target="_blank" rel="noopener noreferrer">Jinglei Zhang</a><sup>1</sup>,
122
  <a href="https://jiankangdeng.github.io/" target="_blank" rel="noopener noreferrer">Jiankang Deng</a><sup>2</sup>,
123
  <br>
124
+ <a href="https://scholar.google.com/citations?user=syoPhv8AAAAJ&hl=en" target="_blank" rel="noopener noreferrer">Chao Ma</a><sup>1</sup>,
125
  <a href="https://rolpotamias.github.io" target="_blank" rel="noopener noreferrer">Rolandos Alexandros Potamias</a><sup>2</sup>
126
  </h3>
127
  <h3>
thirdparty/DROID-SLAM/.gitignore ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a# Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98
+ __pypackages__/
99
+
100
+ # Celery stuff
101
+ celerybeat-schedule
102
+ celerybeat.pid
103
+
104
+ # SageMath parsed files
105
+ *.sage.py
106
+
107
+ # Environments
108
+ .env
109
+ .venv
110
+ env/
111
+ venv/
112
+ ENV/
113
+ env.bak/
114
+ venv.bak/
115
+
116
+ # Spyder project settings
117
+ .spyderproject
118
+ .spyproject
119
+
120
+ # Rope project settings
121
+ .ropeproject
122
+
123
+ # mkdocs documentation
124
+ /site
125
+
126
+ # mypy
127
+ .mypy_cache/
128
+ .dmypy.json
129
+ dmypy.json
130
+
131
+ # Pyre type checker
132
+ .pyre/
133
+
134
+ # pytype static type analyzer
135
+ .pytype/
136
+
137
+ # Cython debug symbols
138
+ cython_debug/
139
+
140
+
141
+
142
+ __pycache__
143
+ build
144
+ dist
145
+ *.egg-info
146
+ *.vscode/
147
+ *.pth
148
+ tests
149
+ checkpoints
150
+ datasets
151
+ runs
152
+ cache
153
+ *.out
154
+ *.o
155
+ data
156
+ figures/*.pdf
157
+
158
+
thirdparty/DROID-SLAM/.gitmodules ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [submodule "thirdparty/lietorch"]
2
+ path = thirdparty/lietorch
3
+ url = https://github.com/princeton-vl/lietorch
4
+ [submodule "thirdparty/eigen"]
5
+ path = thirdparty/eigen
6
+ url = https://gitlab.com/libeigen/eigen.git
thirdparty/DROID-SLAM/LICENSE ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2021, Princeton Vision & Learning Lab
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ 3. Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
thirdparty/DROID-SLAM/README.md ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DROID-SLAM
2
+
3
+
4
+ <!-- <center><img src="misc/DROID.png" width="640" style="center"></center> -->
5
+
6
+
7
+ [![IMAGE ALT TEXT HERE](misc/screenshot.png)](https://www.youtube.com/watch?v=GG78CSlSHSA)
8
+
9
+
10
+
11
+ [DROID-SLAM: Deep Visual SLAM for Monocular, Stereo, and RGB-D Cameras](https://arxiv.org/abs/2108.10869)
12
+ Zachary Teed and Jia Deng
13
+
14
+ ```
15
+ @article{teed2021droid,
16
+ title={{DROID-SLAM: Deep Visual SLAM for Monocular, Stereo, and RGB-D Cameras}},
17
+ author={Teed, Zachary and Deng, Jia},
18
+ journal={Advances in neural information processing systems},
19
+ year={2021}
20
+ }
21
+ ```
22
+
23
+ **Initial Code Release:** This repo currently provides a single GPU implementation of our monocular, stereo, and RGB-D SLAM systems. It currently contains demos, training, and evaluation scripts.
24
+
25
+
26
+ ## Requirements
27
+
28
+ To run the code you will need ...
29
+ * **Inference:** Running the demos will require a GPU with at least 11G of memory.
30
+
31
+ * **Training:** Training requires a GPU with at least 24G of memory. We train on 4 x RTX-3090 GPUs.
32
+
33
+ ## Getting Started
34
+ 1. Clone the repo using the `--recursive` flag
35
+ ```Bash
36
+ git clone --recursive https://github.com/princeton-vl/DROID-SLAM.git
37
+ ```
38
+
39
+ 2. Creating a new anaconda environment using the provided .yaml file. Use `environment_novis.yaml` to if you do not want to use the visualization
40
+ ```Bash
41
+ conda env create -f environment.yaml
42
+ pip install evo --upgrade --no-binary evo
43
+ pip install gdown
44
+ ```
45
+
46
+ 3. Compile the extensions (takes about 10 minutes)
47
+ ```Bash
48
+ python setup.py install
49
+ ```
50
+
51
+
52
+ ## Demos
53
+
54
+ 1. Download the model from google drive: [droid.pth](https://drive.google.com/file/d/1PpqVt1H4maBa_GbPJp4NwxRsd9jk-elh/view?usp=sharing)
55
+
56
+ 2. Download some sample videos using the provided script.
57
+ ```Bash
58
+ ./tools/download_sample_data.sh
59
+ ```
60
+
61
+ Run the demo on any of the samples (all demos can be run on a GPU with 11G of memory). While running, press the "s" key to increase the filtering threshold (= more points) and "a" to decrease the filtering threshold (= fewer points). To save the reconstruction with full resolution depth maps use the `--reconstruction_path` flag.
62
+
63
+
64
+ ```Python
65
+ python demo.py --imagedir=data/abandonedfactory --calib=calib/tartan.txt --stride=2
66
+ ```
67
+
68
+ ```Python
69
+ python demo.py --imagedir=data/sfm_bench/rgb --calib=calib/eth.txt
70
+ ```
71
+
72
+ ```Python
73
+ python demo.py --imagedir=data/Barn --calib=calib/barn.txt --stride=1 --backend_nms=4
74
+ ```
75
+
76
+ ```Python
77
+ python demo.py --imagedir=data/mav0/cam0/data --calib=calib/euroc.txt --t0=150
78
+ ```
79
+
80
+ ```Python
81
+ python demo.py --imagedir=data/rgbd_dataset_freiburg3_cabinet/rgb --calib=calib/tum3.txt
82
+ ```
83
+
84
+
85
+ **Running on your own data:** All you need is a calibration file. Calibration files are in the form
86
+ ```
87
+ fx fy cx cy [k1 k2 p1 p2 [ k3 [ k4 k5 k6 ]]]
88
+ ```
89
+ with parameters in brackets optional.
90
+
91
+ ## Evaluation
92
+ We provide evaluation scripts for TartanAir, EuRoC, and TUM. EuRoC and TUM can be run on a 1080Ti. The TartanAir and ETH will require 24G of memory.
93
+
94
+ ### TartanAir (Mono + Stereo)
95
+ Download the [TartanAir](https://theairlab.org/tartanair-dataset/) dataset using the script `thirdparty/tartanair_tools/download_training.py` and put them in `datasets/TartanAir`
96
+ ```Bash
97
+ ./tools/validate_tartanair.sh --plot_curve # monocular eval
98
+ ./tools/validate_tartanair.sh --plot_curve --stereo # stereo eval
99
+ ```
100
+
101
+ ### EuRoC (Mono + Stereo)
102
+ Download the [EuRoC](https://projects.asl.ethz.ch/datasets/doku.php?id=kmavvisualinertialdatasets) sequences (ASL format) and put them in `datasets/EuRoC`
103
+ ```Bash
104
+ ./tools/evaluate_euroc.sh # monocular eval
105
+ ./tools/evaluate_euroc.sh --stereo # stereo eval
106
+ ```
107
+
108
+ ### TUM-RGBD (Mono)
109
+ Download the fr1 sequences from [TUM-RGBD](https://vision.in.tum.de/data/datasets/rgbd-dataset/download) and put them in `datasets/TUM-RGBD`
110
+ ```Bash
111
+ ./tools/evaluate_tum.sh # monocular eval
112
+ ```
113
+
114
+ ### ETH3D (RGB-D)
115
+ Download the [ETH3D](https://www.eth3d.net/slam_datasets) dataset
116
+ ```Bash
117
+ ./tools/evaluate_eth3d.sh # RGB-D eval
118
+ ```
119
+
120
+ ## Training
121
+
122
+ First download the TartanAir dataset. The download script can be found in `thirdparty/tartanair_tools/download_training.py`. You will only need the `rgb` and `depth` data.
123
+
124
+ ```
125
+ python download_training.py --rgb --depth
126
+ ```
127
+
128
+ You can then run the training script. We use 4x3090 RTX GPUs for training which takes approximatly 1 week. If you use a different number of GPUs, adjust the learning rate accordingly.
129
+
130
+ **Note:** On the first training run, covisibility is computed between all pairs of frames. This can take several hours, but the results are cached so that future training runs will start immediately.
131
+
132
+
133
+ ```
134
+ python train.py --datapath=<path to tartanair> --gpus=4 --lr=0.00025
135
+ ```
136
+
137
+
138
+ ## Acknowledgements
139
+ Data from [TartanAir](https://theairlab.org/tartanair-dataset/) was used to train our model. We additionally use evaluation tools from [evo](https://github.com/MichaelGrupp/evo) and [tartanair_tools](https://github.com/castacks/tartanair_tools).
thirdparty/DROID-SLAM/demo.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('droid_slam')
3
+
4
+ from tqdm import tqdm
5
+ import numpy as np
6
+ import torch
7
+ import lietorch
8
+ import cv2
9
+ import os
10
+ import glob
11
+ import time
12
+ import argparse
13
+
14
+ from torch.multiprocessing import Process
15
+ from droid import Droid
16
+
17
+ from pycocotools import mask as masktool
18
+ import torch.nn.functional as F
19
+
20
+
21
+ def show_image(image):
22
+ image = image.permute(1, 2, 0).cpu().numpy()
23
+ cv2.imshow('image', image / 255.0)
24
+ cv2.waitKey(1)
25
+
26
+ def image_stream(imagedir, calib, stride):
27
+ """ image generator """
28
+ # calib = np.loadtxt(calib, delimiter=" ")
29
+ fx, fy, cx, cy = calib[:4]
30
+
31
+ K = np.eye(3)
32
+ K[0,0] = fx
33
+ K[0,2] = cx
34
+ K[1,1] = fy
35
+ K[1,2] = cy
36
+
37
+ image_list = sorted(glob.glob(f'{imagedir}/*.jpg'))
38
+ image_list = image_list[::stride]
39
+
40
+ for t, imfile in enumerate(image_list):
41
+ image = cv2.imread(imfile)
42
+ if len(calib) > 4:
43
+ image = cv2.undistort(image, K, calib[4:])
44
+
45
+ h0, w0, _ = image.shape
46
+ h1 = int(h0 * np.sqrt((384 * 512) / (h0 * w0)))
47
+ w1 = int(w0 * np.sqrt((384 * 512) / (h0 * w0)))
48
+
49
+ image = cv2.resize(image, (w1, h1))
50
+ image = image[:h1-h1%8, :w1-w1%8]
51
+ image = torch.as_tensor(image).permute(2, 0, 1)
52
+
53
+ intrinsics = torch.as_tensor([fx, fy, cx, cy])
54
+ intrinsics[0::2] *= (w1 / w0)
55
+ intrinsics[1::2] *= (h1 / h0)
56
+
57
+ yield t, image[None], intrinsics
58
+
59
+
60
+ def save_reconstruction(droid, reconstruction_path):
61
+
62
+ from pathlib import Path
63
+ import random
64
+ import string
65
+
66
+ t = droid.video.counter.value
67
+ tstamps = droid.video.tstamp[:t].cpu().numpy()
68
+ images = droid.video.images[:t].cpu().numpy()
69
+ disps = droid.video.disps_up[:t].cpu().numpy()
70
+ poses = droid.video.poses[:t].cpu().numpy()
71
+ intrinsics = droid.video.intrinsics[:t].cpu().numpy()
72
+
73
+ Path("reconstructions/{}".format(reconstruction_path)).mkdir(parents=True, exist_ok=True)
74
+ np.save("reconstructions/{}/tstamps.npy".format(reconstruction_path), tstamps)
75
+ np.save("reconstructions/{}/images.npy".format(reconstruction_path), images)
76
+ np.save("reconstructions/{}/disps.npy".format(reconstruction_path), disps)
77
+ np.save("reconstructions/{}/poses.npy".format(reconstruction_path), poses)
78
+ np.save("reconstructions/{}/intrinsics.npy".format(reconstruction_path), intrinsics)
79
+
80
+
81
+ if __name__ == '__main__':
82
+ parser = argparse.ArgumentParser()
83
+ parser.add_argument("--imagedir", type=str, help="path to image directory")
84
+ parser.add_argument("--calib", type=str, help="path to calibration file")
85
+ parser.add_argument("--t0", default=0, type=int, help="starting frame")
86
+ parser.add_argument("--stride", default=3, type=int, help="frame stride")
87
+
88
+ parser.add_argument("--weights", default="droid.pth")
89
+ parser.add_argument("--buffer", type=int, default=512)
90
+ parser.add_argument("--image_size", default=[240, 320])
91
+ parser.add_argument("--disable_vis", action="store_true")
92
+
93
+ parser.add_argument("--beta", type=float, default=0.3, help="weight for translation / rotation components of flow")
94
+ parser.add_argument("--filter_thresh", type=float, default=2.4, help="how much motion before considering new keyframe")
95
+ parser.add_argument("--warmup", type=int, default=8, help="number of warmup frames")
96
+ parser.add_argument("--keyframe_thresh", type=float, default=4.0, help="threshold to create a new keyframe")
97
+ parser.add_argument("--frontend_thresh", type=float, default=16.0, help="add edges between frames whithin this distance")
98
+ parser.add_argument("--frontend_window", type=int, default=25, help="frontend optimization window")
99
+ parser.add_argument("--frontend_radius", type=int, default=2, help="force edges between frames within radius")
100
+ parser.add_argument("--frontend_nms", type=int, default=1, help="non-maximal supression of edges")
101
+
102
+ parser.add_argument("--backend_thresh", type=float, default=22.0)
103
+ parser.add_argument("--backend_radius", type=int, default=2)
104
+ parser.add_argument("--backend_nms", type=int, default=3)
105
+ parser.add_argument("--upsample", action="store_true")
106
+ parser.add_argument("--reconstruction_path", help="path to saved reconstruction")
107
+ args = parser.parse_args()
108
+
109
+ args.stereo = False
110
+ torch.multiprocessing.set_start_method('spawn')
111
+
112
+ droid = None
113
+
114
+ # need high resolution depths
115
+ if args.reconstruction_path is not None:
116
+ args.upsample = True
117
+
118
+ tstamps = []
119
+ for (t, image, intrinsics) in tqdm(image_stream(args.imagedir, args.calib, args.stride)):
120
+ if t < args.t0:
121
+ continue
122
+
123
+ if not args.disable_vis:
124
+ show_image(image[0])
125
+
126
+ if droid is None:
127
+ args.image_size = [image.shape[2], image.shape[3]]
128
+ droid = Droid(args)
129
+
130
+ droid.track(t, image, intrinsics=intrinsics)
131
+
132
+ if args.reconstruction_path is not None:
133
+ save_reconstruction(droid, args.reconstruction_path)
134
+
135
+ traj_est = droid.terminate(image_stream(args.imagedir, args.calib, args.stride))
thirdparty/DROID-SLAM/droid_slam/data_readers/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
thirdparty/DROID-SLAM/droid_slam/data_readers/augmentation.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision.transforms as transforms
3
+ import numpy as np
4
+ import torch.nn.functional as F
5
+
6
+
7
+ class RGBDAugmentor:
8
+ """ perform augmentation on RGB-D video """
9
+
10
+ def __init__(self, crop_size):
11
+ self.crop_size = crop_size
12
+ self.augcolor = transforms.Compose([
13
+ transforms.ToPILImage(),
14
+ transforms.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.4/3.14),
15
+ transforms.RandomGrayscale(p=0.1),
16
+ transforms.ToTensor()])
17
+
18
+ self.max_scale = 0.25
19
+
20
+ def spatial_transform(self, images, depths, poses, intrinsics):
21
+ """ cropping and resizing """
22
+ ht, wd = images.shape[2:]
23
+
24
+ max_scale = self.max_scale
25
+ min_scale = np.log2(np.maximum(
26
+ (self.crop_size[0] + 1) / float(ht),
27
+ (self.crop_size[1] + 1) / float(wd)))
28
+
29
+ scale = 2 ** np.random.uniform(min_scale, max_scale)
30
+ intrinsics = scale * intrinsics
31
+ depths = depths.unsqueeze(dim=1)
32
+
33
+ images = F.interpolate(images, scale_factor=scale, mode='bilinear',
34
+ align_corners=False, recompute_scale_factor=True)
35
+
36
+ depths = F.interpolate(depths, scale_factor=scale, recompute_scale_factor=True)
37
+
38
+ # always perform center crop (TODO: try non-center crops)
39
+ y0 = (images.shape[2] - self.crop_size[0]) // 2
40
+ x0 = (images.shape[3] - self.crop_size[1]) // 2
41
+
42
+ intrinsics = intrinsics - torch.tensor([0.0, 0.0, x0, y0])
43
+ images = images[:, :, y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
44
+ depths = depths[:, :, y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
45
+
46
+ depths = depths.squeeze(dim=1)
47
+ return images, poses, depths, intrinsics
48
+
49
+ def color_transform(self, images):
50
+ """ color jittering """
51
+ num, ch, ht, wd = images.shape
52
+ images = images.permute(1, 2, 3, 0).reshape(ch, ht, wd*num)
53
+ images = 255 * self.augcolor(images[[2,1,0]] / 255.0)
54
+ return images[[2,1,0]].reshape(ch, ht, wd, num).permute(3,0,1,2).contiguous()
55
+
56
+ def __call__(self, images, poses, depths, intrinsics):
57
+ images = self.color_transform(images)
58
+ return self.spatial_transform(images, depths, poses, intrinsics)
thirdparty/DROID-SLAM/droid_slam/data_readers/base.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ import torch
4
+ import torch.utils.data as data
5
+ import torch.nn.functional as F
6
+
7
+ import csv
8
+ import os
9
+ import cv2
10
+ import math
11
+ import random
12
+ import json
13
+ import pickle
14
+ import os.path as osp
15
+
16
+ from .augmentation import RGBDAugmentor
17
+ from .rgbd_utils import *
18
+
19
+ class RGBDDataset(data.Dataset):
20
+ def __init__(self, name, datapath, n_frames=4, crop_size=[384,512], fmin=8.0, fmax=75.0, do_aug=True):
21
+ """ Base class for RGBD dataset """
22
+ self.aug = None
23
+ self.root = datapath
24
+ self.name = name
25
+
26
+ self.n_frames = n_frames
27
+ self.fmin = fmin # exclude very easy examples
28
+ self.fmax = fmax # exclude very hard examples
29
+
30
+ if do_aug:
31
+ self.aug = RGBDAugmentor(crop_size=crop_size)
32
+
33
+ # building dataset is expensive, cache so only needs to be performed once
34
+ cur_path = osp.dirname(osp.abspath(__file__))
35
+ if not os.path.isdir(osp.join(cur_path, 'cache')):
36
+ os.mkdir(osp.join(cur_path, 'cache'))
37
+
38
+ cache_path = osp.join(cur_path, 'cache', '{}.pickle'.format(self.name))
39
+
40
+ if osp.isfile(cache_path):
41
+ scene_info = pickle.load(open(cache_path, 'rb'))[0]
42
+ else:
43
+ scene_info = self._build_dataset()
44
+ with open(cache_path, 'wb') as cachefile:
45
+ pickle.dump((scene_info,), cachefile)
46
+
47
+ self.scene_info = scene_info
48
+ self._build_dataset_index()
49
+
50
+ def _build_dataset_index(self):
51
+ self.dataset_index = []
52
+ for scene in self.scene_info:
53
+ if not self.__class__.is_test_scene(scene):
54
+ graph = self.scene_info[scene]['graph']
55
+ for i in graph:
56
+ if len(graph[i][0]) > self.n_frames:
57
+ self.dataset_index.append((scene, i))
58
+ else:
59
+ print("Reserving {} for validation".format(scene))
60
+
61
+ @staticmethod
62
+ def image_read(image_file):
63
+ return cv2.imread(image_file)
64
+
65
+ @staticmethod
66
+ def depth_read(depth_file):
67
+ return np.load(depth_file)
68
+
69
+ def build_frame_graph(self, poses, depths, intrinsics, f=16, max_flow=256):
70
+ """ compute optical flow distance between all pairs of frames """
71
+ def read_disp(fn):
72
+ depth = self.__class__.depth_read(fn)[f//2::f, f//2::f]
73
+ depth[depth < 0.01] = np.mean(depth)
74
+ return 1.0 / depth
75
+
76
+ poses = np.array(poses)
77
+ intrinsics = np.array(intrinsics) / f
78
+
79
+ disps = np.stack(list(map(read_disp, depths)), 0)
80
+ d = f * compute_distance_matrix_flow(poses, disps, intrinsics)
81
+
82
+ # uncomment for nice visualization
83
+ # import matplotlib.pyplot as plt
84
+ # plt.imshow(d)
85
+ # plt.show()
86
+
87
+ graph = {}
88
+ for i in range(d.shape[0]):
89
+ j, = np.where(d[i] < max_flow)
90
+ graph[i] = (j, d[i,j])
91
+
92
+ return graph
93
+
94
+ def __getitem__(self, index):
95
+ """ return training video """
96
+
97
+ index = index % len(self.dataset_index)
98
+ scene_id, ix = self.dataset_index[index]
99
+
100
+ frame_graph = self.scene_info[scene_id]['graph']
101
+ images_list = self.scene_info[scene_id]['images']
102
+ depths_list = self.scene_info[scene_id]['depths']
103
+ poses_list = self.scene_info[scene_id]['poses']
104
+ intrinsics_list = self.scene_info[scene_id]['intrinsics']
105
+
106
+ inds = [ ix ]
107
+ while len(inds) < self.n_frames:
108
+ # get other frames within flow threshold
109
+ k = (frame_graph[ix][1] > self.fmin) & (frame_graph[ix][1] < self.fmax)
110
+ frames = frame_graph[ix][0][k]
111
+
112
+ # prefer frames forward in time
113
+ if np.count_nonzero(frames[frames > ix]):
114
+ ix = np.random.choice(frames[frames > ix])
115
+
116
+ elif np.count_nonzero(frames):
117
+ ix = np.random.choice(frames)
118
+
119
+ inds += [ ix ]
120
+
121
+ images, depths, poses, intrinsics = [], [], [], []
122
+ for i in inds:
123
+ images.append(self.__class__.image_read(images_list[i]))
124
+ depths.append(self.__class__.depth_read(depths_list[i]))
125
+ poses.append(poses_list[i])
126
+ intrinsics.append(intrinsics_list[i])
127
+
128
+ images = np.stack(images).astype(np.float32)
129
+ depths = np.stack(depths).astype(np.float32)
130
+ poses = np.stack(poses).astype(np.float32)
131
+ intrinsics = np.stack(intrinsics).astype(np.float32)
132
+
133
+ images = torch.from_numpy(images).float()
134
+ images = images.permute(0, 3, 1, 2)
135
+
136
+ disps = torch.from_numpy(1.0 / depths)
137
+ poses = torch.from_numpy(poses)
138
+ intrinsics = torch.from_numpy(intrinsics)
139
+
140
+ if self.aug is not None:
141
+ images, poses, disps, intrinsics = \
142
+ self.aug(images, poses, disps, intrinsics)
143
+
144
+ # scale scene
145
+ if len(disps[disps>0.01]) > 0:
146
+ s = disps[disps>0.01].mean()
147
+ disps = disps / s
148
+ poses[...,:3] *= s
149
+
150
+ return images, poses, disps, intrinsics
151
+
152
+ def __len__(self):
153
+ return len(self.dataset_index)
154
+
155
+ def __imul__(self, x):
156
+ self.dataset_index *= x
157
+ return self
thirdparty/DROID-SLAM/droid_slam/data_readers/factory.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pickle
3
+ import os
4
+ import os.path as osp
5
+
6
+ # RGBD-Dataset
7
+ from .tartan import TartanAir
8
+
9
+ from .stream import ImageStream
10
+ from .stream import StereoStream
11
+ from .stream import RGBDStream
12
+
13
+ # streaming datasets for inference
14
+ from .tartan import TartanAirStream
15
+ from .tartan import TartanAirTestStream
16
+
17
+ def dataset_factory(dataset_list, **kwargs):
18
+ """ create a combined dataset """
19
+
20
+ from torch.utils.data import ConcatDataset
21
+
22
+ dataset_map = { 'tartan': (TartanAir, ) }
23
+ db_list = []
24
+ for key in dataset_list:
25
+ # cache datasets for faster future loading
26
+ db = dataset_map[key][0](**kwargs)
27
+
28
+ print("Dataset {} has {} images".format(key, len(db)))
29
+ db_list.append(db)
30
+
31
+ return ConcatDataset(db_list)
32
+
33
+
34
+ def create_datastream(dataset_path, **kwargs):
35
+ """ create data_loader to stream images 1 by 1 """
36
+
37
+ from torch.utils.data import DataLoader
38
+
39
+ if osp.isfile(osp.join(dataset_path, 'calibration.txt')):
40
+ db = ETH3DStream(dataset_path, **kwargs)
41
+
42
+ elif osp.isdir(osp.join(dataset_path, 'image_left')):
43
+ db = TartanAirStream(dataset_path, **kwargs)
44
+
45
+ elif osp.isfile(osp.join(dataset_path, 'rgb.txt')):
46
+ db = TUMStream(dataset_path, **kwargs)
47
+
48
+ elif osp.isdir(osp.join(dataset_path, 'mav0')):
49
+ db = EurocStream(dataset_path, **kwargs)
50
+
51
+ elif osp.isfile(osp.join(dataset_path, 'calib.txt')):
52
+ db = KITTIStream(dataset_path, **kwargs)
53
+
54
+ else:
55
+ # db = TartanAirStream(dataset_path, **kwargs)
56
+ db = TartanAirTestStream(dataset_path, **kwargs)
57
+
58
+ stream = DataLoader(db, shuffle=False, batch_size=1, num_workers=4)
59
+ return stream
60
+
61
+
62
+ def create_imagestream(dataset_path, **kwargs):
63
+ """ create data_loader to stream images 1 by 1 """
64
+ from torch.utils.data import DataLoader
65
+
66
+ db = ImageStream(dataset_path, **kwargs)
67
+ return DataLoader(db, shuffle=False, batch_size=1, num_workers=4)
68
+
69
+ def create_stereostream(dataset_path, **kwargs):
70
+ """ create data_loader to stream images 1 by 1 """
71
+ from torch.utils.data import DataLoader
72
+
73
+ db = StereoStream(dataset_path, **kwargs)
74
+ return DataLoader(db, shuffle=False, batch_size=1, num_workers=4)
75
+
76
+ def create_rgbdstream(dataset_path, **kwargs):
77
+ """ create data_loader to stream images 1 by 1 """
78
+ from torch.utils.data import DataLoader
79
+
80
+ db = RGBDStream(dataset_path, **kwargs)
81
+ return DataLoader(db, shuffle=False, batch_size=1, num_workers=4)
82
+
thirdparty/DROID-SLAM/droid_slam/data_readers/rgbd_utils.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os.path as osp
3
+
4
+ import torch
5
+ from lietorch import SE3
6
+
7
+ import geom.projective_ops as pops
8
+ from scipy.spatial.transform import Rotation
9
+
10
+
11
+ def parse_list(filepath, skiprows=0):
12
+ """ read list data """
13
+ data = np.loadtxt(filepath, delimiter=' ', dtype=np.unicode_, skiprows=skiprows)
14
+ return data
15
+
16
+ def associate_frames(tstamp_image, tstamp_depth, tstamp_pose, max_dt=1.0):
17
+ """ pair images, depths, and poses """
18
+ associations = []
19
+ for i, t in enumerate(tstamp_image):
20
+ if tstamp_pose is None:
21
+ j = np.argmin(np.abs(tstamp_depth - t))
22
+ if (np.abs(tstamp_depth[j] - t) < max_dt):
23
+ associations.append((i, j))
24
+
25
+ else:
26
+ j = np.argmin(np.abs(tstamp_depth - t))
27
+ k = np.argmin(np.abs(tstamp_pose - t))
28
+
29
+ if (np.abs(tstamp_depth[j] - t) < max_dt) and \
30
+ (np.abs(tstamp_pose[k] - t) < max_dt):
31
+ associations.append((i, j, k))
32
+
33
+ return associations
34
+
35
+ def loadtum(datapath, frame_rate=-1):
36
+ """ read video data in tum-rgbd format """
37
+ if osp.isfile(osp.join(datapath, 'groundtruth.txt')):
38
+ pose_list = osp.join(datapath, 'groundtruth.txt')
39
+
40
+ elif osp.isfile(osp.join(datapath, 'pose.txt')):
41
+ pose_list = osp.join(datapath, 'pose.txt')
42
+
43
+ else:
44
+ return None, None, None, None
45
+
46
+ image_list = osp.join(datapath, 'rgb.txt')
47
+ depth_list = osp.join(datapath, 'depth.txt')
48
+
49
+ calib_path = osp.join(datapath, 'calibration.txt')
50
+ intrinsic = None
51
+ if osp.isfile(calib_path):
52
+ intrinsic = np.loadtxt(calib_path, delimiter=' ')
53
+ intrinsic = intrinsic.astype(np.float64)
54
+
55
+ image_data = parse_list(image_list)
56
+ depth_data = parse_list(depth_list)
57
+ pose_data = parse_list(pose_list, skiprows=1)
58
+ pose_vecs = pose_data[:,1:].astype(np.float64)
59
+
60
+ tstamp_image = image_data[:,0].astype(np.float64)
61
+ tstamp_depth = depth_data[:,0].astype(np.float64)
62
+ tstamp_pose = pose_data[:,0].astype(np.float64)
63
+ associations = associate_frames(tstamp_image, tstamp_depth, tstamp_pose)
64
+
65
+ # print(len(tstamp_image))
66
+ # print(len(associations))
67
+
68
+ indicies = range(len(associations))[::5]
69
+
70
+ # indicies = [ 0 ]
71
+ # for i in range(1, len(associations)):
72
+ # t0 = tstamp_image[associations[indicies[-1]][0]]
73
+ # t1 = tstamp_image[associations[i][0]]
74
+ # if t1 - t0 > 1.0 / frame_rate:
75
+ # indicies += [ i ]
76
+
77
+ images, poses, depths, intrinsics, tstamps = [], [], [], [], []
78
+ for ix in indicies:
79
+ (i, j, k) = associations[ix]
80
+ images += [ osp.join(datapath, image_data[i,1]) ]
81
+ depths += [ osp.join(datapath, depth_data[j,1]) ]
82
+ poses += [ pose_vecs[k] ]
83
+ tstamps += [ tstamp_image[i] ]
84
+
85
+ if intrinsic is not None:
86
+ intrinsics += [ intrinsic ]
87
+
88
+ return images, depths, poses, intrinsics, tstamps
89
+
90
+
91
+ def all_pairs_distance_matrix(poses, beta=2.5):
92
+ """ compute distance matrix between all pairs of poses """
93
+ poses = np.array(poses, dtype=np.float32)
94
+ poses[:,:3] *= beta # scale to balence rot + trans
95
+ poses = SE3(torch.from_numpy(poses))
96
+
97
+ r = (poses[:,None].inv() * poses[None,:]).log()
98
+ return r.norm(dim=-1).cpu().numpy()
99
+
100
+ def pose_matrix_to_quaternion(pose):
101
+ """ convert 4x4 pose matrix to (t, q) """
102
+ q = Rotation.from_matrix(pose[:3, :3]).as_quat()
103
+ return np.concatenate([pose[:3, 3], q], axis=0)
104
+
105
+ def compute_distance_matrix_flow(poses, disps, intrinsics):
106
+ """ compute flow magnitude between all pairs of frames """
107
+ if not isinstance(poses, SE3):
108
+ poses = torch.from_numpy(poses).float().cuda()[None]
109
+ poses = SE3(poses).inv()
110
+
111
+ disps = torch.from_numpy(disps).float().cuda()[None]
112
+ intrinsics = torch.from_numpy(intrinsics).float().cuda()[None]
113
+
114
+ N = poses.shape[1]
115
+
116
+ ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N), indexing='ij')
117
+ ii = ii.reshape(-1).cuda()
118
+ jj = jj.reshape(-1).cuda()
119
+
120
+ MAX_FLOW = 100.0
121
+ matrix = np.zeros((N, N), dtype=np.float32)
122
+
123
+ s = 2048
124
+ for i in range(0, ii.shape[0], s):
125
+ flow1, val1 = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
126
+ flow2, val2 = pops.induced_flow(poses, disps, intrinsics, jj[i:i+s], ii[i:i+s])
127
+
128
+ flow = torch.stack([flow1, flow2], dim=2)
129
+ val = torch.stack([val1, val2], dim=2)
130
+
131
+ mag = flow.norm(dim=-1).clamp(max=MAX_FLOW)
132
+ mag = mag.view(mag.shape[1], -1)
133
+ val = val.view(val.shape[1], -1)
134
+
135
+ mag = (mag * val).mean(-1) / val.mean(-1)
136
+ mag[val.mean(-1) < 0.7] = np.inf
137
+
138
+ i1 = ii[i:i+s].cpu().numpy()
139
+ j1 = jj[i:i+s].cpu().numpy()
140
+ matrix[i1, j1] = mag.cpu().numpy()
141
+
142
+ return matrix
143
+
144
+
145
+ def compute_distance_matrix_flow2(poses, disps, intrinsics, beta=0.4):
146
+ """ compute flow magnitude between all pairs of frames """
147
+ # if not isinstance(poses, SE3):
148
+ # poses = torch.from_numpy(poses).float().cuda()[None]
149
+ # poses = SE3(poses).inv()
150
+
151
+ # disps = torch.from_numpy(disps).float().cuda()[None]
152
+ # intrinsics = torch.from_numpy(intrinsics).float().cuda()[None]
153
+
154
+ N = poses.shape[1]
155
+
156
+ ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N), indexing='ij')
157
+ ii = ii.reshape(-1)
158
+ jj = jj.reshape(-1)
159
+
160
+ MAX_FLOW = 128.0
161
+ matrix = np.zeros((N, N), dtype=np.float32)
162
+
163
+ s = 2048
164
+ for i in range(0, ii.shape[0], s):
165
+ flow1a, val1a = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s], tonly=True)
166
+ flow1b, val1b = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
167
+ flow2a, val2a = pops.induced_flow(poses, disps, intrinsics, jj[i:i+s], ii[i:i+s], tonly=True)
168
+ flow2b, val2b = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
169
+
170
+ flow1 = flow1a + beta * flow1b
171
+ val1 = val1a * val2b
172
+
173
+ flow2 = flow2a + beta * flow2b
174
+ val2 = val2a * val2b
175
+
176
+ flow = torch.stack([flow1, flow2], dim=2)
177
+ val = torch.stack([val1, val2], dim=2)
178
+
179
+ mag = flow.norm(dim=-1).clamp(max=MAX_FLOW)
180
+ mag = mag.view(mag.shape[1], -1)
181
+ val = val.view(val.shape[1], -1)
182
+
183
+ mag = (mag * val).mean(-1) / val.mean(-1)
184
+ mag[val.mean(-1) < 0.8] = np.inf
185
+
186
+ i1 = ii[i:i+s].cpu().numpy()
187
+ j1 = jj[i:i+s].cpu().numpy()
188
+ matrix[i1, j1] = mag.cpu().numpy()
189
+
190
+ return matrix
thirdparty/DROID-SLAM/droid_slam/data_readers/stream.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ import torch
4
+ import torch.utils.data as data
5
+ import torch.nn.functional as F
6
+
7
+ import csv
8
+ import os
9
+ import cv2
10
+ import math
11
+ import random
12
+ import json
13
+ import pickle
14
+ import os.path as osp
15
+
16
+ from .rgbd_utils import *
17
+
18
+ class RGBDStream(data.Dataset):
19
+ def __init__(self, datapath, frame_rate=-1, image_size=[384,512], crop_size=[0,0]):
20
+ self.datapath = datapath
21
+ self.frame_rate = frame_rate
22
+ self.image_size = image_size
23
+ self.crop_size = crop_size
24
+ self._build_dataset_index()
25
+
26
+ @staticmethod
27
+ def image_read(image_file):
28
+ return cv2.imread(image_file)
29
+
30
+ @staticmethod
31
+ def depth_read(depth_file):
32
+ return np.load(depth_file)
33
+
34
+ def __len__(self):
35
+ return len(self.images)
36
+
37
+ def __getitem__(self, index):
38
+ """ return training video """
39
+ image = self.__class__.image_read(self.images[index])
40
+ image = torch.from_numpy(image).float()
41
+ image = image.permute(2, 0, 1)
42
+
43
+ try:
44
+ tstamp = self.tstamps[index]
45
+ except:
46
+ tstamp = index
47
+
48
+ pose = torch.from_numpy(self.poses[index]).float()
49
+ intrinsic = torch.from_numpy(self.intrinsics[index]).float()
50
+
51
+ # resize image
52
+ sx = self.image_size[1] / image.shape[2]
53
+ sy = self.image_size[0] / image.shape[1]
54
+
55
+ image = F.interpolate(image[None], self.image_size, mode='bilinear', align_corners=False)[0]
56
+
57
+ fx, fy, cx, cy = intrinsic.unbind(dim=0)
58
+ fx, cx = sx * fx, sx * cx
59
+ fy, cy = sy * fy, sy * cy
60
+
61
+ # crop image
62
+ if self.crop_size[0] > 0:
63
+ cy = cy - self.crop_size[0]
64
+ image = image[:,self.crop_size[0]:-self.crop_size[0],:]
65
+
66
+ if self.crop_size[1] > 0:
67
+ cx = cx - self.crop_size[1]
68
+ image = image[:,:,self.crop_size[1]:-self.crop_size[1]]
69
+
70
+ intrinsic = torch.stack([fx, fy, cx, cy])
71
+
72
+ return tstamp, image, pose, intrinsic
73
+
74
+
75
+ class ImageStream(data.Dataset):
76
+ def __init__(self, datapath, intrinsics, rate=1, image_size=[384,512]):
77
+ rgb_list = osp.join(datapath, 'rgb.txt')
78
+ if os.path.isfile(rgb_list):
79
+ rgb_list = np.loadtxt(rgb_list, delimiter=' ', dtype=np.unicode_)
80
+ self.timestamps = rgb_list[:,0].astype(np.float)
81
+ self.images = [os.path.join(datapath, x) for x in rgb_list[:,1]]
82
+ self.images = self.images[::rate]
83
+ self.timestamps = self.timestamps[::rate]
84
+
85
+ else:
86
+ import glob
87
+ self.images = sorted(glob.glob(osp.join(datapath, '*.jpg'))) + sorted(glob.glob(osp.join(datapath, '*.png')))
88
+ self.images = self.images[::rate]
89
+
90
+ self.intrinsics = intrinsics
91
+ self.image_size = image_size
92
+
93
+ def __len__(self):
94
+ return len(self.images)
95
+
96
+ @staticmethod
97
+ def image_read(imfile):
98
+ return cv2.imread(imfile)
99
+
100
+ def __getitem__(self, index):
101
+ """ return training video """
102
+ image = self.__class__.image_read(self.images[index])
103
+
104
+ try:
105
+ tstamp = self.timestamps[index]
106
+ except:
107
+ tstamp = index
108
+
109
+ ht0, wd0 = image.shape[:2]
110
+ ht1, wd1 = self.image_size
111
+
112
+ intrinsics = torch.as_tensor(self.intrinsics)
113
+ intrinsics[0] *= wd1 / wd0
114
+ intrinsics[1] *= ht1 / ht0
115
+ intrinsics[2] *= wd1 / wd0
116
+ intrinsics[3] *= ht1 / ht0
117
+
118
+ # resize image
119
+ ikwargs = {'mode': 'bilinear', 'align_corners': True}
120
+ image = torch.from_numpy(image).float().permute(2, 0, 1)
121
+ image = F.interpolate(image[None], self.image_size, **ikwargs)[0]
122
+
123
+ return tstamp, image, intrinsics
124
+
125
+
126
+
127
+ class StereoStream(data.Dataset):
128
+ def __init__(self, datapath, intrinsics, rate=1, image_size=[384,512],
129
+ map_left=None, map_right=None, left_root='image_left', right_root='image_right'):
130
+ import glob
131
+ self.intrinsics = intrinsics
132
+ self.image_size = image_size
133
+
134
+ imgs = sorted(glob.glob(osp.join(datapath, left_root, '*.png')))[::rate]
135
+ self.images_l = []
136
+ self.images_r = []
137
+ self.tstamps = []
138
+
139
+ for img_l in imgs:
140
+ img_r = img_l.replace(left_root, right_root)
141
+ if os.path.isfile(img_r):
142
+ t = np.float(img_l.split('/')[-1].replace('.png', ''))
143
+ self.tstamps.append(t)
144
+ self.images_l += [ img_l ]
145
+ self.images_r += [ img_r ]
146
+
147
+ self.map_left = map_left
148
+ self.map_right = map_right
149
+
150
+ def __len__(self):
151
+ return len(self.images_l)
152
+
153
+ @staticmethod
154
+ def image_read(imfile, imap=None):
155
+ image = cv2.imread(imfile)
156
+ if imap is not None:
157
+ image = cv2.remap(image, imap[0], imap[1], interpolation=cv2.INTER_LINEAR)
158
+ return image
159
+
160
+ def __getitem__(self, index):
161
+ """ return training video """
162
+ tstamp = self.tstamps[index]
163
+ image_l = self.__class__.image_read(self.images_l[index], self.map_left)
164
+ image_r = self.__class__.image_read(self.images_r[index], self.map_right)
165
+
166
+ ht0, wd0 = image_l.shape[:2]
167
+ ht1, wd1 = self.image_size
168
+
169
+ intrinsics = torch.as_tensor(self.intrinsics)
170
+ intrinsics[0] *= wd1 / wd0
171
+ intrinsics[1] *= ht1 / ht0
172
+ intrinsics[2] *= wd1 / wd0
173
+ intrinsics[3] *= ht1 / ht0
174
+
175
+ image_l = torch.from_numpy(image_l).float().permute(2, 0, 1)
176
+ image_r = torch.from_numpy(image_r).float().permute(2, 0, 1)
177
+
178
+ # resize image
179
+ ikwargs = {'mode': 'bilinear', 'align_corners': True}
180
+ image_l = F.interpolate(image_l[None], self.image_size, **ikwargs)[0]
181
+ image_r = F.interpolate(image_r[None], self.image_size, **ikwargs)[0]
182
+
183
+ return tstamp, image_l, image_r, intrinsics
184
+
185
+
186
+
187
+ # class RGBDStream(data.Dataset):
188
+ # def __init__(self, datapath, intrinsics=None, rate=1, image_size=[384,512]):
189
+ # assoc_file = osp.join(datapath, 'associated.txt')
190
+ # assoc_list = np.loadtxt(assoc_file, delimiter=' ', dtype=np.unicode_)
191
+
192
+ # self.intrinsics = intrinsics
193
+ # self.image_size = image_size
194
+
195
+ # self.timestamps = assoc_list[:,0].astype(np.float)[::rate]
196
+ # self.images = [os.path.join(datapath, x) for x in assoc_list[:,1]][::rate]
197
+ # self.depths = [os.path.join(datapath, x) for x in assoc_list[:,3]][::rate]
198
+
199
+ # def __len__(self):
200
+ # return len(self.images)
201
+
202
+ # @staticmethod
203
+ # def image_read(imfile):
204
+ # return cv2.imread(imfile)
205
+
206
+ # @staticmethod
207
+ # def depth_read(depth_file):
208
+ # depth = cv2.imread(depth_file, cv2.IMREAD_ANYDEPTH)
209
+ # return depth.astype(np.float32) / 5000.0
210
+
211
+ # def __getitem__(self, index):
212
+ # """ return training video """
213
+ # tstamp = self.timestamps[index]
214
+ # image = self.__class__.image_read(self.images[index])
215
+ # depth = self.__class__.depth_read(self.depths[index])
216
+
217
+ # ht0, wd0 = image.shape[:2]
218
+ # ht1, wd1 = self.image_size
219
+
220
+ # intrinsics = torch.as_tensor(self.intrinsics)
221
+ # intrinsics[0] *= wd1 / wd0
222
+ # intrinsics[1] *= ht1 / ht0
223
+ # intrinsics[2] *= wd1 / wd0
224
+ # intrinsics[3] *= ht1 / ht0
225
+
226
+ # # resize image
227
+ # ikwargs = {'mode': 'bilinear', 'align_corners': True}
228
+ # image = torch.from_numpy(image).float().permute(2, 0, 1)
229
+ # image = F.interpolate(image[None], self.image_size, **ikwargs)[0]
230
+
231
+ # depth = torch.from_numpy(depth).float()[None,None]
232
+ # depth = F.interpolate(depth, self.image_size, mode='nearest').squeeze()
233
+
234
+ # return tstamp, image, depth, intrinsics
thirdparty/DROID-SLAM/droid_slam/data_readers/tartan.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ import torch
4
+ import glob
5
+ import cv2
6
+ import os
7
+ import os.path as osp
8
+
9
+ from lietorch import SE3
10
+ from .base import RGBDDataset
11
+ from .stream import RGBDStream
12
+
13
+ cur_path = osp.dirname(osp.abspath(__file__))
14
+ test_split = osp.join(cur_path, 'tartan_test.txt')
15
+ test_split = open(test_split).read().split()
16
+
17
+
18
+ class TartanAir(RGBDDataset):
19
+
20
+ # scale depths to balance rot & trans
21
+ DEPTH_SCALE = 5.0
22
+
23
+ def __init__(self, mode='training', **kwargs):
24
+ self.mode = mode
25
+ self.n_frames = 2
26
+ super(TartanAir, self).__init__(name='TartanAir', **kwargs)
27
+
28
+ @staticmethod
29
+ def is_test_scene(scene):
30
+ # print(scene, any(x in scene for x in test_split))
31
+ return any(x in scene for x in test_split)
32
+
33
+ def _build_dataset(self):
34
+ from tqdm import tqdm
35
+ print("Building TartanAir dataset")
36
+
37
+ scene_info = {}
38
+ scenes = glob.glob(osp.join(self.root, '*/*/*/*'))
39
+ for scene in tqdm(sorted(scenes)):
40
+ images = sorted(glob.glob(osp.join(scene, 'image_left/*.png')))
41
+ depths = sorted(glob.glob(osp.join(scene, 'depth_left/*.npy')))
42
+
43
+ poses = np.loadtxt(osp.join(scene, 'pose_left.txt'), delimiter=' ')
44
+ poses = poses[:, [1, 2, 0, 4, 5, 3, 6]]
45
+ poses[:,:3] /= TartanAir.DEPTH_SCALE
46
+ intrinsics = [TartanAir.calib_read()] * len(images)
47
+
48
+ # graph of co-visible frames based on flow
49
+ graph = self.build_frame_graph(poses, depths, intrinsics)
50
+
51
+ scene = '/'.join(scene.split('/'))
52
+ scene_info[scene] = {'images': images, 'depths': depths,
53
+ 'poses': poses, 'intrinsics': intrinsics, 'graph': graph}
54
+
55
+ return scene_info
56
+
57
+ @staticmethod
58
+ def calib_read():
59
+ return np.array([320.0, 320.0, 320.0, 240.0])
60
+
61
+ @staticmethod
62
+ def image_read(image_file):
63
+ return cv2.imread(image_file)
64
+
65
+ @staticmethod
66
+ def depth_read(depth_file):
67
+ depth = np.load(depth_file) / TartanAir.DEPTH_SCALE
68
+ depth[depth==np.nan] = 1.0
69
+ depth[depth==np.inf] = 1.0
70
+ return depth
71
+
72
+
73
+ class TartanAirStream(RGBDStream):
74
+ def __init__(self, datapath, **kwargs):
75
+ super(TartanAirStream, self).__init__(datapath=datapath, **kwargs)
76
+
77
+ def _build_dataset_index(self):
78
+ """ build list of images, poses, depths, and intrinsics """
79
+ self.root = 'datasets/TartanAir'
80
+
81
+ scene = osp.join(self.root, self.datapath)
82
+ image_glob = osp.join(scene, 'image_left/*.png')
83
+ images = sorted(glob.glob(image_glob))
84
+
85
+ poses = np.loadtxt(osp.join(scene, 'pose_left.txt'), delimiter=' ')
86
+ poses = poses[:, [1, 2, 0, 4, 5, 3, 6]]
87
+
88
+ poses = SE3(torch.as_tensor(poses))
89
+ poses = poses[[0]].inv() * poses
90
+ poses = poses.data.cpu().numpy()
91
+
92
+ intrinsic = self.calib_read(self.datapath)
93
+ intrinsics = np.tile(intrinsic[None], (len(images), 1))
94
+
95
+ self.images = images[::int(self.frame_rate)]
96
+ self.poses = poses[::int(self.frame_rate)]
97
+ self.intrinsics = intrinsics[::int(self.frame_rate)]
98
+
99
+ @staticmethod
100
+ def calib_read(datapath):
101
+ return np.array([320.0, 320.0, 320.0, 240.0])
102
+
103
+ @staticmethod
104
+ def image_read(image_file):
105
+ return cv2.imread(image_file)
106
+
107
+
108
+ class TartanAirTestStream(RGBDStream):
109
+ def __init__(self, datapath, **kwargs):
110
+ super(TartanAirTestStream, self).__init__(datapath=datapath, **kwargs)
111
+
112
+ def _build_dataset_index(self):
113
+ """ build list of images, poses, depths, and intrinsics """
114
+ self.root = 'datasets/mono'
115
+ image_glob = osp.join(self.root, self.datapath, '*.png')
116
+ images = sorted(glob.glob(image_glob))
117
+
118
+ poses = np.loadtxt(osp.join(self.root, 'mono_gt', self.datapath + '.txt'), delimiter=' ')
119
+ poses = poses[:, [1, 2, 0, 4, 5, 3, 6]]
120
+
121
+ poses = SE3(torch.as_tensor(poses))
122
+ poses = poses[[0]].inv() * poses
123
+ poses = poses.data.cpu().numpy()
124
+
125
+ intrinsic = self.calib_read(self.datapath)
126
+ intrinsics = np.tile(intrinsic[None], (len(images), 1))
127
+
128
+ self.images = images[::int(self.frame_rate)]
129
+ self.poses = poses[::int(self.frame_rate)]
130
+ self.intrinsics = intrinsics[::int(self.frame_rate)]
131
+
132
+ @staticmethod
133
+ def calib_read(datapath):
134
+ return np.array([320.0, 320.0, 320.0, 240.0])
135
+
136
+ @staticmethod
137
+ def image_read(image_file):
138
+ return cv2.imread(image_file)
thirdparty/DROID-SLAM/droid_slam/data_readers/tartan_test.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ abandonedfactory/abandonedfactory/Easy/P011
2
+ abandonedfactory/abandonedfactory/Hard/P011
3
+ abandonedfactory_night/abandonedfactory_night/Easy/P013
4
+ abandonedfactory_night/abandonedfactory_night/Hard/P014
5
+ amusement/amusement/Easy/P008
6
+ amusement/amusement/Hard/P007
7
+ carwelding/carwelding/Easy/P007
8
+ endofworld/endofworld/Easy/P009
9
+ gascola/gascola/Easy/P008
10
+ gascola/gascola/Hard/P009
11
+ hospital/hospital/Easy/P036
12
+ hospital/hospital/Hard/P049
13
+ japanesealley/japanesealley/Easy/P007
14
+ japanesealley/japanesealley/Hard/P005
15
+ neighborhood/neighborhood/Easy/P021
16
+ neighborhood/neighborhood/Hard/P017
17
+ ocean/ocean/Easy/P013
18
+ ocean/ocean/Hard/P009
19
+ office2/office2/Easy/P011
20
+ office2/office2/Hard/P010
21
+ office/office/Hard/P007
22
+ oldtown/oldtown/Easy/P007
23
+ oldtown/oldtown/Hard/P008
24
+ seasidetown/seasidetown/Easy/P009
25
+ seasonsforest/seasonsforest/Easy/P011
26
+ seasonsforest/seasonsforest/Hard/P006
27
+ seasonsforest_winter/seasonsforest_winter/Easy/P009
28
+ seasonsforest_winter/seasonsforest_winter/Hard/P018
29
+ soulcity/soulcity/Easy/P012
30
+ soulcity/soulcity/Hard/P009
31
+ westerndesert/westerndesert/Easy/P013
32
+ westerndesert/westerndesert/Hard/P007
thirdparty/DROID-SLAM/droid_slam/depth_video.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import lietorch
4
+ import droid_backends
5
+
6
+ from torch.multiprocessing import Process, Queue, Lock, Value
7
+ from collections import OrderedDict
8
+
9
+ from droid_net import cvx_upsample
10
+ import geom.projective_ops as pops
11
+
12
+ class DepthVideo:
13
+ def __init__(self, image_size=[480, 640], buffer=1024, stereo=False, device="cuda:0"):
14
+
15
+ # current keyframe count
16
+ self.counter = Value('i', 0)
17
+ self.ready = Value('i', 0)
18
+ self.ht = ht = image_size[0]
19
+ self.wd = wd = image_size[1]
20
+
21
+ ### state attributes ###
22
+ self.tstamp = torch.zeros(buffer, device="cuda", dtype=torch.float).share_memory_()
23
+ self.images = torch.zeros(buffer, 3, ht, wd, device="cuda", dtype=torch.uint8)
24
+ self.dirty = torch.zeros(buffer, device="cuda", dtype=torch.bool).share_memory_()
25
+ self.red = torch.zeros(buffer, device="cuda", dtype=torch.bool).share_memory_()
26
+ self.poses = torch.zeros(buffer, 7, device="cuda", dtype=torch.float).share_memory_()
27
+ self.disps = torch.ones(buffer, ht//8, wd//8, device="cuda", dtype=torch.float).share_memory_()
28
+ self.disps_sens = torch.zeros(buffer, ht//8, wd//8, device="cuda", dtype=torch.float).share_memory_()
29
+ self.disps_up = torch.zeros(buffer, ht, wd, device="cuda", dtype=torch.float).share_memory_()
30
+ self.intrinsics = torch.zeros(buffer, 4, device="cuda", dtype=torch.float).share_memory_()
31
+
32
+ self.masks = torch.zeros(buffer, ht//8, wd//8, device="cuda", dtype=torch.float).share_memory_()
33
+ self.stereo = stereo
34
+ c = 1 if not self.stereo else 2
35
+
36
+ ### feature attributes ###
37
+ self.fmaps = torch.zeros(buffer, c, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
38
+ self.nets = torch.zeros(buffer, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
39
+ self.inps = torch.zeros(buffer, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
40
+
41
+ # initialize poses to identity transformation
42
+ self.poses[:] = torch.as_tensor([0, 0, 0, 0, 0, 0, 1], dtype=torch.float, device="cuda")
43
+
44
+ def get_lock(self):
45
+ return self.counter.get_lock()
46
+
47
+ def __item_setter(self, index, item):
48
+ if isinstance(index, int) and index >= self.counter.value:
49
+ self.counter.value = index + 1
50
+
51
+ elif isinstance(index, torch.Tensor) and index.max().item() > self.counter.value:
52
+ self.counter.value = index.max().item() + 1
53
+
54
+ # self.dirty[index] = True
55
+ self.tstamp[index] = item[0]
56
+ self.images[index] = item[1]
57
+
58
+ if item[2] is not None:
59
+ self.poses[index] = item[2]
60
+
61
+ if item[3] is not None:
62
+ self.disps[index] = item[3]
63
+
64
+ if item[4] is not None:
65
+ depth = item[4][3::8,3::8]
66
+ self.disps_sens[index] = torch.where(depth>0, 1.0/depth, depth)
67
+
68
+ if item[5] is not None:
69
+ self.intrinsics[index] = item[5]
70
+
71
+ if len(item) > 6:
72
+ self.fmaps[index] = item[6]
73
+
74
+ if len(item) > 7:
75
+ self.nets[index] = item[7]
76
+
77
+ if len(item) > 8:
78
+ self.inps[index] = item[8]
79
+
80
+ if len(item) > 9:
81
+ self.masks[index] = item[9]
82
+
83
+ def __setitem__(self, index, item):
84
+ with self.get_lock():
85
+ self.__item_setter(index, item)
86
+
87
+ def __getitem__(self, index):
88
+ """ index the depth video """
89
+
90
+ with self.get_lock():
91
+ # support negative indexing
92
+ if isinstance(index, int) and index < 0:
93
+ index = self.counter.value + index
94
+
95
+ item = (
96
+ self.poses[index],
97
+ self.disps[index],
98
+ self.intrinsics[index],
99
+ self.fmaps[index],
100
+ self.nets[index],
101
+ self.inps[index])
102
+
103
+ return item
104
+
105
+ def append(self, *item):
106
+ with self.get_lock():
107
+ self.__item_setter(self.counter.value, item)
108
+
109
+
110
+ ### geometric operations ###
111
+
112
+ @staticmethod
113
+ def format_indicies(ii, jj):
114
+ """ to device, long, {-1} """
115
+
116
+ if not isinstance(ii, torch.Tensor):
117
+ ii = torch.as_tensor(ii)
118
+
119
+ if not isinstance(jj, torch.Tensor):
120
+ jj = torch.as_tensor(jj)
121
+
122
+ ii = ii.to(device="cuda", dtype=torch.long).reshape(-1)
123
+ jj = jj.to(device="cuda", dtype=torch.long).reshape(-1)
124
+
125
+ return ii, jj
126
+
127
+ def upsample(self, ix, mask):
128
+ """ upsample disparity """
129
+
130
+ disps_up = cvx_upsample(self.disps[ix].unsqueeze(-1), mask)
131
+ self.disps_up[ix] = disps_up.squeeze()
132
+
133
+ def normalize(self):
134
+ """ normalize depth and poses """
135
+
136
+ with self.get_lock():
137
+ s = self.disps[:self.counter.value].mean()
138
+ self.disps[:self.counter.value] /= s
139
+ self.poses[:self.counter.value,:3] *= s
140
+ self.dirty[:self.counter.value] = True
141
+
142
+
143
+ def reproject(self, ii, jj):
144
+ """ project points from ii -> jj """
145
+ ii, jj = DepthVideo.format_indicies(ii, jj)
146
+ Gs = lietorch.SE3(self.poses[None])
147
+
148
+ coords, valid_mask = \
149
+ pops.projective_transform(Gs, self.disps[None], self.intrinsics[None], ii, jj)
150
+
151
+ return coords, valid_mask
152
+
153
+ def distance(self, ii=None, jj=None, beta=0.3, bidirectional=True):
154
+ """ frame distance metric """
155
+
156
+ return_matrix = False
157
+ if ii is None:
158
+ return_matrix = True
159
+ N = self.counter.value
160
+ ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N), indexing='ij')
161
+
162
+ ii, jj = DepthVideo.format_indicies(ii, jj)
163
+
164
+ if bidirectional:
165
+
166
+ poses = self.poses[:self.counter.value].clone()
167
+
168
+ d1 = droid_backends.frame_distance(
169
+ poses, self.disps, self.intrinsics[0], ii, jj, beta)
170
+
171
+ d2 = droid_backends.frame_distance(
172
+ poses, self.disps, self.intrinsics[0], jj, ii, beta)
173
+
174
+ d = .5 * (d1 + d2)
175
+
176
+ else:
177
+ d = droid_backends.frame_distance(
178
+ self.poses, self.disps, self.intrinsics[0], ii, jj, beta)
179
+
180
+ if return_matrix:
181
+ return d.reshape(N, N)
182
+
183
+ return d
184
+
185
+ def ba(self, target, weight, eta, ii, jj, t0=1, t1=None, itrs=2, lm=1e-4, ep=0.1, motion_only=False):
186
+ """ dense bundle adjustment (DBA) """
187
+
188
+ with self.get_lock():
189
+
190
+ # [t0, t1] window of bundle adjustment optimization
191
+ if t1 is None:
192
+ t1 = max(ii.max().item(), jj.max().item()) + 1
193
+
194
+ droid_backends.ba(self.poses, self.disps, self.intrinsics[0], self.disps_sens,
195
+ target, weight, eta, ii, jj, t0, t1, itrs, lm, ep, motion_only)
196
+
197
+ self.disps.clamp_(min=0.001)
thirdparty/DROID-SLAM/droid_slam/droid.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import lietorch
3
+ import numpy as np
4
+
5
+ from droid_net import DroidNet
6
+ from depth_video import DepthVideo
7
+ from motion_filter import MotionFilter
8
+ from droid_frontend import DroidFrontend
9
+ from droid_backend import DroidBackend
10
+ from trajectory_filler import PoseTrajectoryFiller
11
+
12
+ from collections import OrderedDict
13
+ from torch.multiprocessing import Process
14
+
15
+
16
+ class Droid:
17
+ def __init__(self, args):
18
+ super(Droid, self).__init__()
19
+ self.load_weights(args.weights)
20
+ self.args = args
21
+ self.disable_vis = args.disable_vis
22
+
23
+ # store images, depth, poses, intrinsics (shared between processes)
24
+ self.video = DepthVideo(args.image_size, args.buffer, stereo=args.stereo)
25
+
26
+ # filter incoming frames so that there is enough motion
27
+ self.filterx = MotionFilter(self.net, self.video, thresh=args.filter_thresh)
28
+
29
+ # frontend process
30
+ self.frontend = DroidFrontend(self.net, self.video, self.args)
31
+
32
+ # backend process
33
+ self.backend = DroidBackend(self.net, self.video, self.args)
34
+
35
+ # visualizer
36
+ if not self.disable_vis:
37
+ # from visualization import droid_visualization
38
+ from vis_headless import droid_visualization
39
+ print('Using headless ...')
40
+ self.visualizer = Process(target=droid_visualization, args=(self.video, '.'))
41
+ self.visualizer.start()
42
+
43
+ # post processor - fill in poses for non-keyframes
44
+ self.traj_filler = PoseTrajectoryFiller(self.net, self.video)
45
+
46
+
47
+ def load_weights(self, weights):
48
+ """ load trained model weights """
49
+
50
+ self.net = DroidNet()
51
+ state_dict = OrderedDict([
52
+ (k.replace("module.", ""), v) for (k, v) in torch.load(weights).items()])
53
+
54
+ state_dict["update.weight.2.weight"] = state_dict["update.weight.2.weight"][:2]
55
+ state_dict["update.weight.2.bias"] = state_dict["update.weight.2.bias"][:2]
56
+ state_dict["update.delta.2.weight"] = state_dict["update.delta.2.weight"][:2]
57
+ state_dict["update.delta.2.bias"] = state_dict["update.delta.2.bias"][:2]
58
+
59
+ self.net.load_state_dict(state_dict)
60
+ self.net.to("cuda:0").eval()
61
+
62
+ def track(self, tstamp, image, depth=None, intrinsics=None, mask=None):
63
+ """ main thread - update map """
64
+
65
+ with torch.no_grad():
66
+ # check there is enough motion
67
+ self.filterx.track(tstamp, image, depth, intrinsics, mask)
68
+
69
+ # local bundle adjustment
70
+ self.frontend()
71
+
72
+ # global bundle adjustment
73
+ # self.backend()
74
+
75
+ def terminate(self, stream=None, backend=True):
76
+ """ terminate the visualization process, return poses [t, q] """
77
+
78
+ del self.frontend
79
+
80
+ if backend:
81
+ torch.cuda.empty_cache()
82
+ # print("#" * 32)
83
+ self.backend(7)
84
+
85
+ torch.cuda.empty_cache()
86
+ # print("#" * 32)
87
+ self.backend(12)
88
+
89
+ camera_trajectory = self.traj_filler(stream)
90
+ return camera_trajectory.inv().data.cpu().numpy()
91
+
92
+ def compute_error(self):
93
+ """ compute slam reprojection error """
94
+
95
+ del self.frontend
96
+
97
+ torch.cuda.empty_cache()
98
+ self.backend(12)
99
+
100
+ return self.backend.errors[-1]
101
+
102
+
thirdparty/DROID-SLAM/droid_slam/droid_backend.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import lietorch
3
+ import numpy as np
4
+
5
+ from lietorch import SE3
6
+ from factor_graph import FactorGraph
7
+
8
+
9
+ class DroidBackend:
10
+ def __init__(self, net, video, args):
11
+ self.video = video
12
+ self.update_op = net.update
13
+
14
+ # global optimization window
15
+ self.t0 = 0
16
+ self.t1 = 0
17
+
18
+ self.upsample = args.upsample
19
+ self.beta = args.beta
20
+ self.backend_thresh = args.backend_thresh
21
+ self.backend_radius = args.backend_radius
22
+ self.backend_nms = args.backend_nms
23
+ self.errors = []
24
+
25
+ @torch.no_grad()
26
+ def __call__(self, steps=12):
27
+ """ main update """
28
+
29
+ t = self.video.counter.value
30
+ if not self.video.stereo and not torch.any(self.video.disps_sens):
31
+ self.video.normalize()
32
+
33
+ graph = FactorGraph(self.video, self.update_op, corr_impl="alt", max_factors=16*t, upsample=self.upsample)
34
+
35
+ graph.add_proximity_factors(rad=self.backend_radius,
36
+ nms=self.backend_nms,
37
+ thresh=self.backend_thresh,
38
+ beta=self.beta)
39
+
40
+ graph.update_lowmem(steps=steps)
41
+ self.errors.append(self.cal_err(graph))
42
+ graph.clear_edges()
43
+ self.video.dirty[:t] = True
44
+
45
+ return
46
+
47
+ def cal_err(self, graph):
48
+ coord, _ = graph.video.reproject(graph.ii, graph.jj)
49
+ diff = graph.target - coord
50
+ err = diff.norm(dim=-1).mean().item()
51
+ return err
52
+
thirdparty/DROID-SLAM/droid_slam/droid_frontend.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import lietorch
3
+ import numpy as np
4
+
5
+ from lietorch import SE3
6
+ from factor_graph import FactorGraph
7
+
8
+
9
+ class DroidFrontend:
10
+ def __init__(self, net, video, args):
11
+ self.video = video
12
+ self.update_op = net.update
13
+ self.graph = FactorGraph(video, net.update, max_factors=48, upsample=args.upsample)
14
+
15
+ # local optimization window
16
+ self.t0 = 0
17
+ self.t1 = 0
18
+
19
+ # frontent variables
20
+ self.is_initialized = False
21
+ self.count = 0
22
+
23
+ self.max_age = 25
24
+ self.iters1 = 4
25
+ self.iters2 = 2
26
+
27
+ self.warmup = args.warmup
28
+ self.beta = args.beta
29
+ self.frontend_nms = args.frontend_nms
30
+ self.keyframe_thresh = args.keyframe_thresh
31
+ self.frontend_window = args.frontend_window
32
+ self.frontend_thresh = args.frontend_thresh
33
+ self.frontend_radius = args.frontend_radius
34
+
35
+ def __update(self):
36
+ """ add edges, perform update """
37
+
38
+ self.count += 1
39
+ self.t1 += 1
40
+
41
+ if self.graph.corr is not None:
42
+ self.graph.rm_factors(self.graph.age > self.max_age, store=True)
43
+
44
+ self.graph.add_proximity_factors(self.t1-5, max(self.t1-self.frontend_window, 0),
45
+ rad=self.frontend_radius, nms=self.frontend_nms, thresh=self.frontend_thresh, beta=self.beta, remove=True)
46
+
47
+ self.video.disps[self.t1-1] = torch.where(self.video.disps_sens[self.t1-1] > 0,
48
+ self.video.disps_sens[self.t1-1], self.video.disps[self.t1-1])
49
+
50
+ for itr in range(self.iters1):
51
+ self.graph.update(None, None, use_inactive=True)
52
+
53
+ # set initial pose for next frame
54
+ poses = SE3(self.video.poses)
55
+ d = self.video.distance([self.t1-3], [self.t1-2], beta=self.beta, bidirectional=True)
56
+
57
+ if d.item() < self.keyframe_thresh:
58
+ self.graph.rm_keyframe(self.t1 - 2)
59
+
60
+ with self.video.get_lock():
61
+ self.video.counter.value -= 1
62
+ self.t1 -= 1
63
+
64
+ else:
65
+ for itr in range(self.iters2):
66
+ self.graph.update(None, None, use_inactive=True)
67
+
68
+ # set pose for next itration
69
+ self.video.poses[self.t1] = self.video.poses[self.t1-1]
70
+ self.video.disps[self.t1] = self.video.disps[self.t1-1].mean()
71
+
72
+ # update visualization
73
+ self.video.dirty[self.graph.ii.min():self.t1] = True
74
+
75
+ def __initialize(self):
76
+ """ initialize the SLAM system """
77
+
78
+ self.t0 = 0
79
+ self.t1 = self.video.counter.value
80
+
81
+ self.graph.add_neighborhood_factors(self.t0, self.t1, r=3)
82
+
83
+ for itr in range(8):
84
+ self.graph.update(1, use_inactive=True)
85
+
86
+ self.graph.add_proximity_factors(0, 0, rad=2, nms=2, thresh=self.frontend_thresh, remove=False)
87
+
88
+ for itr in range(8):
89
+ self.graph.update(1, use_inactive=True)
90
+
91
+
92
+ # self.video.normalize()
93
+ self.video.poses[self.t1] = self.video.poses[self.t1-1].clone()
94
+ self.video.disps[self.t1] = self.video.disps[self.t1-4:self.t1].mean()
95
+
96
+ # initialization complete
97
+ self.is_initialized = True
98
+ self.last_pose = self.video.poses[self.t1-1].clone()
99
+ self.last_disp = self.video.disps[self.t1-1].clone()
100
+ self.last_time = self.video.tstamp[self.t1-1].clone()
101
+
102
+ with self.video.get_lock():
103
+ self.video.ready.value = 1
104
+ self.video.dirty[:self.t1] = True
105
+
106
+ self.graph.rm_factors(self.graph.ii < self.warmup-4, store=True)
107
+
108
+ def __call__(self):
109
+ """ main update """
110
+
111
+ # do initialization
112
+ if not self.is_initialized and self.video.counter.value == self.warmup:
113
+ self.__initialize()
114
+
115
+ # do update
116
+ elif self.is_initialized and self.t1 < self.video.counter.value:
117
+ self.__update()
118
+
119
+
thirdparty/DROID-SLAM/droid_slam/droid_net.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from collections import OrderedDict
6
+
7
+ from modules.extractor import BasicEncoder
8
+ from modules.corr import CorrBlock
9
+ from modules.gru import ConvGRU
10
+ from modules.clipping import GradientClip
11
+
12
+ from lietorch import SE3
13
+ from geom.ba import BA
14
+
15
+ import geom.projective_ops as pops
16
+ from geom.graph_utils import graph_to_edge_list, keyframe_indicies
17
+
18
+ from torch_scatter import scatter_mean
19
+
20
+
21
+ def cvx_upsample(data, mask):
22
+ """ upsample pixel-wise transformation field """
23
+ batch, ht, wd, dim = data.shape
24
+ data = data.permute(0, 3, 1, 2)
25
+ mask = mask.view(batch, 1, 9, 8, 8, ht, wd)
26
+ mask = torch.softmax(mask, dim=2)
27
+
28
+ up_data = F.unfold(data, [3,3], padding=1)
29
+ up_data = up_data.view(batch, dim, 9, 1, 1, ht, wd)
30
+
31
+ up_data = torch.sum(mask * up_data, dim=2)
32
+ up_data = up_data.permute(0, 4, 2, 5, 3, 1)
33
+ up_data = up_data.reshape(batch, 8*ht, 8*wd, dim)
34
+
35
+ return up_data
36
+
37
+ def upsample_disp(disp, mask):
38
+ batch, num, ht, wd = disp.shape
39
+ disp = disp.view(batch*num, ht, wd, 1)
40
+ mask = mask.view(batch*num, -1, ht, wd)
41
+ return cvx_upsample(disp, mask).view(batch, num, 8*ht, 8*wd)
42
+
43
+
44
+ class GraphAgg(nn.Module):
45
+ def __init__(self):
46
+ super(GraphAgg, self).__init__()
47
+ self.conv1 = nn.Conv2d(128, 128, 3, padding=1)
48
+ self.conv2 = nn.Conv2d(128, 128, 3, padding=1)
49
+ self.relu = nn.ReLU(inplace=True)
50
+
51
+ self.eta = nn.Sequential(
52
+ nn.Conv2d(128, 1, 3, padding=1),
53
+ GradientClip(),
54
+ nn.Softplus())
55
+
56
+ self.upmask = nn.Sequential(
57
+ nn.Conv2d(128, 8*8*9, 1, padding=0))
58
+
59
+ def forward(self, net, ii):
60
+ batch, num, ch, ht, wd = net.shape
61
+ net = net.view(batch*num, ch, ht, wd)
62
+
63
+ _, ix = torch.unique(ii, return_inverse=True)
64
+ net = self.relu(self.conv1(net))
65
+
66
+ net = net.view(batch, num, 128, ht, wd)
67
+ net = scatter_mean(net, ix, dim=1)
68
+ net = net.view(-1, 128, ht, wd)
69
+
70
+ net = self.relu(self.conv2(net))
71
+
72
+ eta = self.eta(net).view(batch, -1, ht, wd)
73
+ upmask = self.upmask(net).view(batch, -1, 8*8*9, ht, wd)
74
+
75
+ return .01 * eta, upmask
76
+
77
+
78
+ class UpdateModule(nn.Module):
79
+ def __init__(self):
80
+ super(UpdateModule, self).__init__()
81
+ cor_planes = 4 * (2*3 + 1)**2
82
+
83
+ self.corr_encoder = nn.Sequential(
84
+ nn.Conv2d(cor_planes, 128, 1, padding=0),
85
+ nn.ReLU(inplace=True),
86
+ nn.Conv2d(128, 128, 3, padding=1),
87
+ nn.ReLU(inplace=True))
88
+
89
+ self.flow_encoder = nn.Sequential(
90
+ nn.Conv2d(4, 128, 7, padding=3),
91
+ nn.ReLU(inplace=True),
92
+ nn.Conv2d(128, 64, 3, padding=1),
93
+ nn.ReLU(inplace=True))
94
+
95
+ self.weight = nn.Sequential(
96
+ nn.Conv2d(128, 128, 3, padding=1),
97
+ nn.ReLU(inplace=True),
98
+ nn.Conv2d(128, 2, 3, padding=1),
99
+ GradientClip(),
100
+ nn.Sigmoid())
101
+
102
+ self.delta = nn.Sequential(
103
+ nn.Conv2d(128, 128, 3, padding=1),
104
+ nn.ReLU(inplace=True),
105
+ nn.Conv2d(128, 2, 3, padding=1),
106
+ GradientClip())
107
+
108
+ self.gru = ConvGRU(128, 128+128+64)
109
+ self.agg = GraphAgg()
110
+
111
+ def forward(self, net, inp, corr, flow=None, ii=None, jj=None, mask=None):
112
+ """ RaftSLAM update operator """
113
+
114
+ batch, num, ch, ht, wd = net.shape
115
+
116
+ if flow is None:
117
+ flow = torch.zeros(batch, num, 4, ht, wd, device=net.device)
118
+
119
+ output_dim = (batch, num, -1, ht, wd)
120
+ net = net.view(batch*num, -1, ht, wd)
121
+ inp = inp.view(batch*num, -1, ht, wd)
122
+ corr = corr.view(batch*num, -1, ht, wd)
123
+ flow = flow.view(batch*num, -1, ht, wd)
124
+
125
+ corr = self.corr_encoder(corr)
126
+ flow = self.flow_encoder(flow)
127
+ net = self.gru(net, inp, corr, flow)
128
+
129
+ ### update variables ###
130
+ delta = self.delta(net).view(*output_dim)
131
+ weight = self.weight(net).view(*output_dim)
132
+
133
+ # print('Update')
134
+ # print('delta:', delta.shape) # [1,1,2,64,48]
135
+ # print('weight:', weight.shape) # [1,1,2,64,48]
136
+
137
+ delta = delta.permute(0,1,3,4,2)[...,:2].contiguous()
138
+ weight = weight.permute(0,1,3,4,2)[...,:2].contiguous()
139
+
140
+ net = net.view(*output_dim)
141
+
142
+ if ii is not None:
143
+ eta, upmask = self.agg(net, ii.to(net.device))
144
+ return net, delta, weight, eta, upmask
145
+
146
+ else:
147
+ return net, delta, weight
148
+
149
+
150
+ class DroidNet(nn.Module):
151
+ def __init__(self):
152
+ super(DroidNet, self).__init__()
153
+ self.fnet = BasicEncoder(output_dim=128, norm_fn='instance')
154
+ self.cnet = BasicEncoder(output_dim=256, norm_fn='none')
155
+ self.update = UpdateModule()
156
+
157
+
158
+ def extract_features(self, images):
159
+ """ run feeature extraction networks """
160
+
161
+ # normalize images
162
+ images = images[:, :, [2,1,0]] / 255.0
163
+ mean = torch.as_tensor([0.485, 0.456, 0.406], device=images.device)
164
+ std = torch.as_tensor([0.229, 0.224, 0.225], device=images.device)
165
+ images = images.sub_(mean[:, None, None]).div_(std[:, None, None])
166
+
167
+ fmaps = self.fnet(images)
168
+ net = self.cnet(images)
169
+
170
+ net, inp = net.split([128,128], dim=2)
171
+ net = torch.tanh(net)
172
+ inp = torch.relu(inp)
173
+ return fmaps, net, inp
174
+
175
+
176
+ def forward(self, Gs, images, disps, intrinsics, graph=None, num_steps=12, fixedp=2):
177
+ """ Estimates SE3 or Sim3 between pair of frames """
178
+
179
+ u = keyframe_indicies(graph)
180
+ ii, jj, kk = graph_to_edge_list(graph)
181
+
182
+ ii = ii.to(device=images.device, dtype=torch.long)
183
+ jj = jj.to(device=images.device, dtype=torch.long)
184
+
185
+ fmaps, net, inp = self.extract_features(images)
186
+ net, inp = net[:,ii], inp[:,ii]
187
+ corr_fn = CorrBlock(fmaps[:,ii], fmaps[:,jj], num_levels=4, radius=3)
188
+
189
+ ht, wd = images.shape[-2:]
190
+ coords0 = pops.coords_grid(ht//8, wd//8, device=images.device)
191
+
192
+ coords1, _ = pops.projective_transform(Gs, disps, intrinsics, ii, jj)
193
+ target = coords1.clone()
194
+
195
+ Gs_list, disp_list, residual_list = [], [], []
196
+ for step in range(num_steps):
197
+ Gs = Gs.detach()
198
+ disps = disps.detach()
199
+ coords1 = coords1.detach()
200
+ target = target.detach()
201
+
202
+ # extract motion features
203
+ corr = corr_fn(coords1)
204
+ resd = target - coords1
205
+ flow = coords1 - coords0
206
+
207
+ motion = torch.cat([flow, resd], dim=-1)
208
+ motion = motion.permute(0,1,4,2,3).clamp(-64.0, 64.0)
209
+
210
+ net, delta, weight, eta, upmask = \
211
+ self.update(net, inp, corr, motion, ii, jj)
212
+
213
+ target = coords1 + delta
214
+
215
+ for i in range(2):
216
+ Gs, disps = BA(target, weight, eta, Gs, disps, intrinsics, ii, jj, fixedp=2)
217
+
218
+ coords1, valid_mask = pops.projective_transform(Gs, disps, intrinsics, ii, jj)
219
+ residual = (target - coords1)
220
+
221
+ Gs_list.append(Gs)
222
+ disp_list.append(upsample_disp(disps, upmask))
223
+ residual_list.append(valid_mask * residual)
224
+
225
+
226
+ return Gs_list, disp_list, residual_list
thirdparty/DROID-SLAM/droid_slam/factor_graph.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import lietorch
3
+ import numpy as np
4
+
5
+ import matplotlib.pyplot as plt
6
+ from lietorch import SE3
7
+ from modules.corr import CorrBlock, AltCorrBlock
8
+ import geom.projective_ops as pops
9
+ from glob import glob
10
+
11
+ class FactorGraph:
12
+ def __init__(self, video, update_op, device="cuda:0", corr_impl="volume", max_factors=-1, upsample=False):
13
+ self.video = video
14
+ self.update_op = update_op
15
+ self.device = device
16
+ self.max_factors = max_factors
17
+ self.corr_impl = corr_impl
18
+ self.upsample = upsample
19
+
20
+ # operator at 1/8 resolution
21
+ self.ht = ht = video.ht // 8
22
+ self.wd = wd = video.wd // 8
23
+
24
+ self.coords0 = pops.coords_grid(ht, wd, device=device)
25
+ self.ii = torch.as_tensor([], dtype=torch.long, device=device)
26
+ self.jj = torch.as_tensor([], dtype=torch.long, device=device)
27
+ self.age = torch.as_tensor([], dtype=torch.long, device=device)
28
+
29
+ self.corr, self.net, self.inp = None, None, None
30
+ self.damping = 1e-6 * torch.ones_like(self.video.disps)
31
+
32
+ self.target = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
33
+ self.weight = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
34
+
35
+ # inactive factors
36
+ self.ii_inac = torch.as_tensor([], dtype=torch.long, device=device)
37
+ self.jj_inac = torch.as_tensor([], dtype=torch.long, device=device)
38
+ self.ii_bad = torch.as_tensor([], dtype=torch.long, device=device)
39
+ self.jj_bad = torch.as_tensor([], dtype=torch.long, device=device)
40
+
41
+ self.target_inac = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
42
+ self.weight_inac = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
43
+
44
+ def __filter_repeated_edges(self, ii, jj):
45
+ """ remove duplicate edges """
46
+
47
+ keep = torch.zeros(ii.shape[0], dtype=torch.bool, device=ii.device)
48
+ eset = set(
49
+ [(i.item(), j.item()) for i, j in zip(self.ii, self.jj)] +
50
+ [(i.item(), j.item()) for i, j in zip(self.ii_inac, self.jj_inac)])
51
+
52
+ for k, (i, j) in enumerate(zip(ii, jj)):
53
+ keep[k] = (i.item(), j.item()) not in eset
54
+
55
+ return ii[keep], jj[keep]
56
+
57
+ def print_edges(self):
58
+ ii = self.ii.cpu().numpy()
59
+ jj = self.jj.cpu().numpy()
60
+
61
+ ix = np.argsort(ii)
62
+ ii = ii[ix]
63
+ jj = jj[ix]
64
+
65
+ w = torch.mean(self.weight, dim=[0,2,3,4]).cpu().numpy()
66
+ w = w[ix]
67
+ for e in zip(ii, jj, w):
68
+ print(e)
69
+ print()
70
+
71
+ def filter_edges(self):
72
+ """ remove bad edges """
73
+ conf = torch.mean(self.weight, dim=[0,2,3,4])
74
+ mask = (torch.abs(self.ii-self.jj) > 2) & (conf < 0.001)
75
+
76
+ self.ii_bad = torch.cat([self.ii_bad, self.ii[mask]])
77
+ self.jj_bad = torch.cat([self.jj_bad, self.jj[mask]])
78
+ self.rm_factors(mask, store=False)
79
+
80
+ def clear_edges(self):
81
+ self.rm_factors(self.ii >= 0)
82
+ self.net = None
83
+ self.inp = None
84
+
85
+ @torch.cuda.amp.autocast(enabled=True)
86
+ def add_factors(self, ii, jj, remove=False):
87
+ """ add edges to factor graph """
88
+
89
+ if not isinstance(ii, torch.Tensor):
90
+ ii = torch.as_tensor(ii, dtype=torch.long, device=self.device)
91
+
92
+ if not isinstance(jj, torch.Tensor):
93
+ jj = torch.as_tensor(jj, dtype=torch.long, device=self.device)
94
+
95
+ # remove duplicate edges
96
+ ii, jj = self.__filter_repeated_edges(ii, jj)
97
+
98
+
99
+ if ii.shape[0] == 0:
100
+ return
101
+
102
+ # place limit on number of factors
103
+ if self.max_factors > 0 and self.ii.shape[0] + ii.shape[0] > self.max_factors \
104
+ and self.corr is not None and remove:
105
+
106
+ ix = torch.arange(len(self.age))[torch.argsort(self.age).cpu()]
107
+ self.rm_factors(ix >= self.max_factors - ii.shape[0], store=True)
108
+
109
+ net = self.video.nets[ii].to(self.device).unsqueeze(0)
110
+
111
+ # correlation volume for new edges
112
+ if self.corr_impl == "volume":
113
+ c = (ii == jj).long()
114
+ fmap1 = self.video.fmaps[ii,0].to(self.device).unsqueeze(0)
115
+ fmap2 = self.video.fmaps[jj,c].to(self.device).unsqueeze(0)
116
+ corr = CorrBlock(fmap1, fmap2)
117
+ self.corr = corr if self.corr is None else self.corr.cat(corr)
118
+
119
+ inp = self.video.inps[ii].to(self.device).unsqueeze(0)
120
+ self.inp = inp if self.inp is None else torch.cat([self.inp, inp], 1)
121
+
122
+ with torch.cuda.amp.autocast(enabled=False):
123
+ target, _ = self.video.reproject(ii, jj)
124
+ weight = torch.zeros_like(target)
125
+
126
+ self.ii = torch.cat([self.ii, ii], 0)
127
+ self.jj = torch.cat([self.jj, jj], 0)
128
+ self.age = torch.cat([self.age, torch.zeros_like(ii)], 0)
129
+
130
+ # reprojection factors
131
+ self.net = net if self.net is None else torch.cat([self.net, net], 1)
132
+
133
+ self.target = torch.cat([self.target, target], 1)
134
+ self.weight = torch.cat([self.weight, weight], 1)
135
+
136
+ @torch.cuda.amp.autocast(enabled=True)
137
+ def rm_factors(self, mask, store=False):
138
+ """ drop edges from factor graph """
139
+
140
+ # store estimated factors
141
+ if store:
142
+ self.ii_inac = torch.cat([self.ii_inac, self.ii[mask]], 0)
143
+ self.jj_inac = torch.cat([self.jj_inac, self.jj[mask]], 0)
144
+ self.target_inac = torch.cat([self.target_inac, self.target[:,mask]], 1)
145
+ self.weight_inac = torch.cat([self.weight_inac, self.weight[:,mask]], 1)
146
+
147
+ self.ii = self.ii[~mask]
148
+ self.jj = self.jj[~mask]
149
+ self.age = self.age[~mask]
150
+
151
+ if self.corr_impl == "volume":
152
+ self.corr = self.corr[~mask]
153
+
154
+ if self.net is not None:
155
+ self.net = self.net[:,~mask]
156
+
157
+ if self.inp is not None:
158
+ self.inp = self.inp[:,~mask]
159
+
160
+ self.target = self.target[:,~mask]
161
+ self.weight = self.weight[:,~mask]
162
+
163
+
164
+ @torch.cuda.amp.autocast(enabled=True)
165
+ def rm_keyframe(self, ix):
166
+ """ drop edges from factor graph """
167
+
168
+
169
+ with self.video.get_lock():
170
+ self.video.images[ix] = self.video.images[ix+1]
171
+ self.video.poses[ix] = self.video.poses[ix+1]
172
+ self.video.disps[ix] = self.video.disps[ix+1]
173
+ self.video.disps_sens[ix] = self.video.disps_sens[ix+1]
174
+ self.video.intrinsics[ix] = self.video.intrinsics[ix+1]
175
+
176
+ self.video.nets[ix] = self.video.nets[ix+1]
177
+ self.video.inps[ix] = self.video.inps[ix+1]
178
+ self.video.fmaps[ix] = self.video.fmaps[ix+1]
179
+ self.video.tstamp[ix] = self.video.tstamp[ix+1]
180
+ self.video.masks[ix] = self.video.masks[ix+1]
181
+
182
+ m = (self.ii_inac == ix) | (self.jj_inac == ix)
183
+ self.ii_inac[self.ii_inac >= ix] -= 1
184
+ self.jj_inac[self.jj_inac >= ix] -= 1
185
+
186
+ if torch.any(m):
187
+ self.ii_inac = self.ii_inac[~m]
188
+ self.jj_inac = self.jj_inac[~m]
189
+ self.target_inac = self.target_inac[:,~m]
190
+ self.weight_inac = self.weight_inac[:,~m]
191
+
192
+ m = (self.ii == ix) | (self.jj == ix)
193
+
194
+ self.ii[self.ii >= ix] -= 1
195
+ self.jj[self.jj >= ix] -= 1
196
+ self.rm_factors(m, store=False)
197
+
198
+
199
+ @torch.cuda.amp.autocast(enabled=True)
200
+ def update(self, t0=None, t1=None, itrs=3, use_inactive=False, EP=1e-7, motion_only=False):
201
+ """ run update operator on factor graph """
202
+
203
+ # motion features
204
+ with torch.cuda.amp.autocast(enabled=False):
205
+ coords1, mask = self.video.reproject(self.ii, self.jj)
206
+ motn = torch.cat([coords1 - self.coords0, self.target - coords1], dim=-1)
207
+ motn = motn.permute(0,1,4,2,3).clamp(-64.0, 64.0)
208
+
209
+ # correlation features
210
+ corr = self.corr(coords1)
211
+ self.net, delta, weight, damping, upmask = \
212
+ self.update_op(self.net, self.inp, corr, motn, self.ii, self.jj)
213
+
214
+ ##### save confidecnce weight for vis #####
215
+ # for k in range(len(self.ii)):
216
+ # w = weight[:, k].detach().cpu().numpy()
217
+ # idx_i = self.ii[k]
218
+ # idx_j = self.jj[k]
219
+ # np.save(f'pred_conf/{idx_i:04d}_{idx_j:04d}.npy', w)
220
+ #############################################
221
+
222
+ # Shapes:
223
+ # weight: [1, k, h//8, w//8, 2]
224
+ # self.ii: [k]; self.jj: [k]
225
+ msk = self.video.masks[self.ii] > 0
226
+ weight[:,msk] = 0.0
227
+
228
+ if t0 is None:
229
+ t0 = max(1, self.ii.min().item()+1)
230
+
231
+ with torch.cuda.amp.autocast(enabled=False):
232
+ self.target = coords1 + delta.to(dtype=torch.float)
233
+ self.weight = weight.to(dtype=torch.float)
234
+
235
+ ht, wd = self.coords0.shape[0:2]
236
+ self.damping[torch.unique(self.ii)] = damping
237
+
238
+ if use_inactive:
239
+ m = (self.ii_inac >= t0 - 3) & (self.jj_inac >= t0 - 3)
240
+ ii = torch.cat([self.ii_inac[m], self.ii], 0)
241
+ jj = torch.cat([self.jj_inac[m], self.jj], 0)
242
+ target = torch.cat([self.target_inac[:,m], self.target], 1)
243
+ weight = torch.cat([self.weight_inac[:,m], self.weight], 1)
244
+
245
+ else:
246
+ ii, jj, target, weight = self.ii, self.jj, self.target, self.weight
247
+
248
+
249
+ damping = .2 * self.damping[torch.unique(ii)].contiguous() + EP
250
+
251
+ target = target.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
252
+ weight = weight.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
253
+
254
+ # dense bundle adjustment
255
+ self.video.ba(target, weight, damping, ii, jj, t0, t1,
256
+ itrs=itrs, lm=1e-4, ep=0.1, motion_only=motion_only)
257
+
258
+ if self.upsample:
259
+ self.video.upsample(torch.unique(self.ii), upmask)
260
+
261
+ self.age += 1
262
+
263
+
264
+ @torch.cuda.amp.autocast(enabled=False)
265
+ def update_lowmem(self, t0=None, t1=None, itrs=2, use_inactive=False, EP=1e-7, steps=8):
266
+ """ run update operator on factor graph - reduced memory implementation """
267
+
268
+ # alternate corr implementation
269
+ t = self.video.counter.value
270
+
271
+ num, rig, ch, ht, wd = self.video.fmaps.shape
272
+ corr_op = AltCorrBlock(self.video.fmaps.view(1, num*rig, ch, ht, wd))
273
+
274
+ print("Global BA Iteration with {} steps".format(steps))
275
+ for step in range(steps):
276
+ # print("Global BA Iteration #{}".format(step+1))
277
+ with torch.cuda.amp.autocast(enabled=False):
278
+ coords1, mask = self.video.reproject(self.ii, self.jj)
279
+ motn = torch.cat([coords1 - self.coords0, self.target - coords1], dim=-1)
280
+ motn = motn.permute(0,1,4,2,3).clamp(-64.0, 64.0)
281
+
282
+ s = 8
283
+ for i in range(0, self.jj.max()+1, s):
284
+ v = (self.ii >= i) & (self.ii < i + s)
285
+ iis = self.ii[v]
286
+ jjs = self.jj[v]
287
+
288
+ ht, wd = self.coords0.shape[0:2]
289
+ corr1 = corr_op(coords1[:,v], rig * iis, rig * jjs + (iis == jjs).long())
290
+
291
+ with torch.cuda.amp.autocast(enabled=True):
292
+
293
+ net, delta, weight, damping, upmask = \
294
+ self.update_op(self.net[:,v], self.video.inps[None,iis], corr1, motn[:,v], iis, jjs)
295
+
296
+ if self.upsample:
297
+ self.video.upsample(torch.unique(iis), upmask)
298
+
299
+ # Shapes:
300
+ # weight: [1, k, h//8, w//8, 2]
301
+ # self.ii: [k]; self.jj: [k]
302
+ msk = self.video.masks[iis] > 0
303
+ weight[:,msk] = 0.0
304
+
305
+ self.net[:,v] = net
306
+ self.target[:,v] = coords1[:,v] + delta.float()
307
+ self.weight[:,v] = weight.float()
308
+ self.damping[torch.unique(iis)] = damping
309
+
310
+ damping = .2 * self.damping[torch.unique(self.ii)].contiguous() + EP
311
+ target = self.target.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
312
+ weight = self.weight.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
313
+
314
+ # dense bundle adjustment
315
+ self.video.ba(target, weight, damping, self.ii, self.jj, 1, t,
316
+ itrs=itrs, lm=1e-5, ep=1e-2, motion_only=False)
317
+
318
+ self.video.dirty[:t] = True
319
+
320
+ def add_neighborhood_factors(self, t0, t1, r=3):
321
+ """ add edges between neighboring frames within radius r """
322
+
323
+ ii, jj = torch.meshgrid(torch.arange(t0,t1), torch.arange(t0,t1), indexing='ij')
324
+ ii = ii.reshape(-1).to(dtype=torch.long, device=self.device)
325
+ jj = jj.reshape(-1).to(dtype=torch.long, device=self.device)
326
+
327
+ c = 1 if self.video.stereo else 0
328
+
329
+ keep = ((ii - jj).abs() > c) & ((ii - jj).abs() <= r)
330
+ self.add_factors(ii[keep], jj[keep])
331
+
332
+
333
+ def add_proximity_factors(self, t0=0, t1=0, rad=2, nms=2, beta=0.25, thresh=16.0, remove=False):
334
+ """ add edges to the factor graph based on distance """
335
+
336
+ t = self.video.counter.value
337
+ ix = torch.arange(t0, t)
338
+ jx = torch.arange(t1, t)
339
+
340
+ ii, jj = torch.meshgrid(ix, jx, indexing='ij')
341
+ ii = ii.reshape(-1)
342
+ jj = jj.reshape(-1)
343
+
344
+ d = self.video.distance(ii, jj, beta=beta)
345
+ d[ii - rad < jj] = np.inf
346
+ d[d > 100] = np.inf
347
+
348
+ ii1 = torch.cat([self.ii, self.ii_bad, self.ii_inac], 0)
349
+ jj1 = torch.cat([self.jj, self.jj_bad, self.jj_inac], 0)
350
+ for i, j in zip(ii1.cpu().numpy(), jj1.cpu().numpy()):
351
+ for di in range(-nms, nms+1):
352
+ for dj in range(-nms, nms+1):
353
+ if abs(di) + abs(dj) <= max(min(abs(i-j)-2, nms), 0):
354
+ i1 = i + di
355
+ j1 = j + dj
356
+
357
+ if (t0 <= i1 < t) and (t1 <= j1 < t):
358
+ d[(i1-t0)*(t-t1) + (j1-t1)] = np.inf
359
+
360
+
361
+ es = []
362
+ for i in range(t0, t):
363
+ if self.video.stereo:
364
+ es.append((i, i))
365
+ d[(i-t0)*(t-t1) + (i-t1)] = np.inf
366
+
367
+ for j in range(max(i-rad-1,0), i):
368
+ es.append((i,j))
369
+ es.append((j,i))
370
+ d[(i-t0)*(t-t1) + (j-t1)] = np.inf
371
+
372
+ ix = torch.argsort(d)
373
+ for k in ix:
374
+ if d[k].item() > thresh:
375
+ continue
376
+
377
+ if len(es) > self.max_factors:
378
+ break
379
+
380
+ i = ii[k]
381
+ j = jj[k]
382
+
383
+ # bidirectional
384
+ es.append((i, j))
385
+ es.append((j, i))
386
+
387
+ for di in range(-nms, nms+1):
388
+ for dj in range(-nms, nms+1):
389
+ if abs(di) + abs(dj) <= max(min(abs(i-j)-2, nms), 0):
390
+ i1 = i + di
391
+ j1 = j + dj
392
+
393
+ if (t0 <= i1 < t) and (t1 <= j1 < t):
394
+ d[(i1-t0)*(t-t1) + (j1-t1)] = np.inf
395
+
396
+ ii, jj = torch.as_tensor(es, device=self.device).unbind(dim=-1)
397
+ self.add_factors(ii, jj, remove)
thirdparty/DROID-SLAM/droid_slam/geom/__init__.py ADDED
File without changes
thirdparty/DROID-SLAM/droid_slam/geom/ba.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import lietorch
2
+ import torch
3
+ import torch.nn.functional as F
4
+
5
+ from .chol import block_solve, schur_solve
6
+ import geom.projective_ops as pops
7
+
8
+ from torch_scatter import scatter_sum
9
+
10
+
11
+ # utility functions for scattering ops
12
+ def safe_scatter_add_mat(A, ii, jj, n, m):
13
+ v = (ii >= 0) & (jj >= 0) & (ii < n) & (jj < m)
14
+ return scatter_sum(A[:,v], ii[v]*m + jj[v], dim=1, dim_size=n*m)
15
+
16
+ def safe_scatter_add_vec(b, ii, n):
17
+ v = (ii >= 0) & (ii < n)
18
+ return scatter_sum(b[:,v], ii[v], dim=1, dim_size=n)
19
+
20
+ # apply retraction operator to inv-depth maps
21
+ def disp_retr(disps, dz, ii):
22
+ ii = ii.to(device=dz.device)
23
+ return disps + scatter_sum(dz, ii, dim=1, dim_size=disps.shape[1])
24
+
25
+ # apply retraction operator to poses
26
+ def pose_retr(poses, dx, ii):
27
+ ii = ii.to(device=dx.device)
28
+ return poses.retr(scatter_sum(dx, ii, dim=1, dim_size=poses.shape[1]))
29
+
30
+
31
+ def BA(target, weight, eta, poses, disps, intrinsics, ii, jj, fixedp=1, rig=1):
32
+ """ Full Bundle Adjustment """
33
+
34
+ B, P, ht, wd = disps.shape
35
+ N = ii.shape[0]
36
+ D = poses.manifold_dim
37
+
38
+ ### 1: commpute jacobians and residuals ###
39
+ coords, valid, (Ji, Jj, Jz) = pops.projective_transform(
40
+ poses, disps, intrinsics, ii, jj, jacobian=True)
41
+
42
+ r = (target - coords).view(B, N, -1, 1)
43
+ w = .001 * (valid * weight).view(B, N, -1, 1)
44
+
45
+ ### 2: construct linear system ###
46
+ Ji = Ji.reshape(B, N, -1, D)
47
+ Jj = Jj.reshape(B, N, -1, D)
48
+ wJiT = (w * Ji).transpose(2,3)
49
+ wJjT = (w * Jj).transpose(2,3)
50
+
51
+ Jz = Jz.reshape(B, N, ht*wd, -1)
52
+
53
+ Hii = torch.matmul(wJiT, Ji)
54
+ Hij = torch.matmul(wJiT, Jj)
55
+ Hji = torch.matmul(wJjT, Ji)
56
+ Hjj = torch.matmul(wJjT, Jj)
57
+
58
+ vi = torch.matmul(wJiT, r).squeeze(-1)
59
+ vj = torch.matmul(wJjT, r).squeeze(-1)
60
+
61
+ Ei = (wJiT.view(B,N,D,ht*wd,-1) * Jz[:,:,None]).sum(dim=-1)
62
+ Ej = (wJjT.view(B,N,D,ht*wd,-1) * Jz[:,:,None]).sum(dim=-1)
63
+
64
+ w = w.view(B, N, ht*wd, -1)
65
+ r = r.view(B, N, ht*wd, -1)
66
+ wk = torch.sum(w*r*Jz, dim=-1)
67
+ Ck = torch.sum(w*Jz*Jz, dim=-1)
68
+
69
+ kx, kk = torch.unique(ii, return_inverse=True)
70
+ M = kx.shape[0]
71
+
72
+ # only optimize keyframe poses
73
+ P = P // rig - fixedp
74
+ ii = ii // rig - fixedp
75
+ jj = jj // rig - fixedp
76
+
77
+ H = safe_scatter_add_mat(Hii, ii, ii, P, P) + \
78
+ safe_scatter_add_mat(Hij, ii, jj, P, P) + \
79
+ safe_scatter_add_mat(Hji, jj, ii, P, P) + \
80
+ safe_scatter_add_mat(Hjj, jj, jj, P, P)
81
+
82
+ E = safe_scatter_add_mat(Ei, ii, kk, P, M) + \
83
+ safe_scatter_add_mat(Ej, jj, kk, P, M)
84
+
85
+ v = safe_scatter_add_vec(vi, ii, P) + \
86
+ safe_scatter_add_vec(vj, jj, P)
87
+
88
+ C = safe_scatter_add_vec(Ck, kk, M)
89
+ w = safe_scatter_add_vec(wk, kk, M)
90
+
91
+ C = C + eta.view(*C.shape) + 1e-7
92
+
93
+ H = H.view(B, P, P, D, D)
94
+ E = E.view(B, P, M, D, ht*wd)
95
+
96
+ ### 3: solve the system ###
97
+ dx, dz = schur_solve(H, E, C, v, w)
98
+
99
+ ### 4: apply retraction ###
100
+ poses = pose_retr(poses, dx, torch.arange(P) + fixedp)
101
+ disps = disp_retr(disps, dz.view(B,-1,ht,wd), kx)
102
+
103
+ disps = torch.where(disps > 10, torch.zeros_like(disps), disps)
104
+ disps = disps.clamp(min=0.0)
105
+
106
+ return poses, disps
107
+
108
+
109
+ def MoBA(target, weight, eta, poses, disps, intrinsics, ii, jj, fixedp=1, rig=1):
110
+ """ Motion only bundle adjustment """
111
+
112
+ B, P, ht, wd = disps.shape
113
+ N = ii.shape[0]
114
+ D = poses.manifold_dim
115
+
116
+ ### 1: commpute jacobians and residuals ###
117
+ coords, valid, (Ji, Jj, Jz) = pops.projective_transform(
118
+ poses, disps, intrinsics, ii, jj, jacobian=True)
119
+
120
+ r = (target - coords).view(B, N, -1, 1)
121
+ w = .001 * (valid * weight).view(B, N, -1, 1)
122
+
123
+ ### 2: construct linear system ###
124
+ Ji = Ji.reshape(B, N, -1, D)
125
+ Jj = Jj.reshape(B, N, -1, D)
126
+ wJiT = (w * Ji).transpose(2,3)
127
+ wJjT = (w * Jj).transpose(2,3)
128
+
129
+ Hii = torch.matmul(wJiT, Ji)
130
+ Hij = torch.matmul(wJiT, Jj)
131
+ Hji = torch.matmul(wJjT, Ji)
132
+ Hjj = torch.matmul(wJjT, Jj)
133
+
134
+ vi = torch.matmul(wJiT, r).squeeze(-1)
135
+ vj = torch.matmul(wJjT, r).squeeze(-1)
136
+
137
+ # only optimize keyframe poses
138
+ P = P // rig - fixedp
139
+ ii = ii // rig - fixedp
140
+ jj = jj // rig - fixedp
141
+
142
+ H = safe_scatter_add_mat(Hii, ii, ii, P, P) + \
143
+ safe_scatter_add_mat(Hij, ii, jj, P, P) + \
144
+ safe_scatter_add_mat(Hji, jj, ii, P, P) + \
145
+ safe_scatter_add_mat(Hjj, jj, jj, P, P)
146
+
147
+ v = safe_scatter_add_vec(vi, ii, P) + \
148
+ safe_scatter_add_vec(vj, jj, P)
149
+
150
+ H = H.view(B, P, P, D, D)
151
+
152
+ ### 3: solve the system ###
153
+ dx = block_solve(H, v)
154
+
155
+ ### 4: apply retraction ###
156
+ poses = pose_retr(poses, dx, torch.arange(P) + fixedp)
157
+ return poses
158
+
thirdparty/DROID-SLAM/droid_slam/geom/chol.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ import geom.projective_ops as pops
4
+
5
+ class CholeskySolver(torch.autograd.Function):
6
+ @staticmethod
7
+ def forward(ctx, H, b):
8
+ # don't crash training if cholesky decomp fails
9
+ try:
10
+ U = torch.linalg.cholesky(H)
11
+ xs = torch.cholesky_solve(b, U)
12
+ ctx.save_for_backward(U, xs)
13
+ ctx.failed = False
14
+ except Exception as e:
15
+ print(e)
16
+ ctx.failed = True
17
+ xs = torch.zeros_like(b)
18
+
19
+ return xs
20
+
21
+ @staticmethod
22
+ def backward(ctx, grad_x):
23
+ if ctx.failed:
24
+ return None, None
25
+
26
+ U, xs = ctx.saved_tensors
27
+ dz = torch.cholesky_solve(grad_x, U)
28
+ dH = -torch.matmul(xs, dz.transpose(-1,-2))
29
+
30
+ return dH, dz
31
+
32
+ def block_solve(H, b, ep=0.1, lm=0.0001):
33
+ """ solve normal equations """
34
+ B, N, _, D, _ = H.shape
35
+ I = torch.eye(D).to(H.device)
36
+ H = H + (ep + lm*H) * I
37
+
38
+ H = H.permute(0,1,3,2,4)
39
+ H = H.reshape(B, N*D, N*D)
40
+ b = b.reshape(B, N*D, 1)
41
+
42
+ x = CholeskySolver.apply(H,b)
43
+ return x.reshape(B, N, D)
44
+
45
+
46
+ def schur_solve(H, E, C, v, w, ep=0.1, lm=0.0001, sless=False):
47
+ """ solve using shur complement """
48
+
49
+ B, P, M, D, HW = E.shape
50
+ H = H.permute(0,1,3,2,4).reshape(B, P*D, P*D)
51
+ E = E.permute(0,1,3,2,4).reshape(B, P*D, M*HW)
52
+ Q = (1.0 / C).view(B, M*HW, 1)
53
+
54
+ # damping
55
+ I = torch.eye(P*D).to(H.device)
56
+ H = H + (ep + lm*H) * I
57
+
58
+ v = v.reshape(B, P*D, 1)
59
+ w = w.reshape(B, M*HW, 1)
60
+
61
+ Et = E.transpose(1,2)
62
+ S = H - torch.matmul(E, Q*Et)
63
+ v = v - torch.matmul(E, Q*w)
64
+
65
+ dx = CholeskySolver.apply(S, v)
66
+ if sless:
67
+ return dx.reshape(B, P, D)
68
+
69
+ dz = Q * (w - Et @ dx)
70
+ dx = dx.reshape(B, P, D)
71
+ dz = dz.reshape(B, M, HW)
72
+
73
+ return dx, dz
thirdparty/DROID-SLAM/droid_slam/geom/graph_utils.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ import numpy as np
4
+ from collections import OrderedDict
5
+
6
+ import lietorch
7
+ from data_readers.rgbd_utils import compute_distance_matrix_flow, compute_distance_matrix_flow2
8
+
9
+
10
+ def graph_to_edge_list(graph):
11
+ ii, jj, kk = [], [], []
12
+ for s, u in enumerate(graph):
13
+ for v in graph[u]:
14
+ ii.append(u)
15
+ jj.append(v)
16
+ kk.append(s)
17
+
18
+ ii = torch.as_tensor(ii)
19
+ jj = torch.as_tensor(jj)
20
+ kk = torch.as_tensor(kk)
21
+ return ii, jj, kk
22
+
23
+ def keyframe_indicies(graph):
24
+ return torch.as_tensor([u for u in graph])
25
+
26
+ def meshgrid(m, n, device='cuda'):
27
+ ii, jj = torch.meshgrid(torch.arange(m), torch.arange(n), indexing='ij')
28
+ return ii.reshape(-1).to(device), jj.reshape(-1).to(device)
29
+
30
+ def neighbourhood_graph(n, r):
31
+ ii, jj = meshgrid(n, n)
32
+ d = (ii - jj).abs()
33
+ keep = (d >= 1) & (d <= r)
34
+ return ii[keep], jj[keep]
35
+
36
+
37
+ def build_frame_graph(poses, disps, intrinsics, num=16, thresh=24.0, r=2):
38
+ """ construct a frame graph between co-visible frames """
39
+ N = poses.shape[1]
40
+ poses = poses[0].cpu().numpy()
41
+ disps = disps[0][:,3::8,3::8].cpu().numpy()
42
+ intrinsics = intrinsics[0].cpu().numpy() / 8.0
43
+ d = compute_distance_matrix_flow(poses, disps, intrinsics)
44
+
45
+ count = 0
46
+ graph = OrderedDict()
47
+
48
+ for i in range(N):
49
+ graph[i] = []
50
+ d[i,i] = np.inf
51
+ for j in range(i-r, i+r+1):
52
+ if 0 <= j < N and i != j:
53
+ graph[i].append(j)
54
+ d[i,j] = np.inf
55
+ count += 1
56
+
57
+ while count < num:
58
+ ix = np.argmin(d)
59
+ i, j = ix // N, ix % N
60
+
61
+ if d[i,j] < thresh:
62
+ graph[i].append(j)
63
+ d[i,j] = np.inf
64
+ count += 1
65
+ else:
66
+ break
67
+
68
+ return graph
69
+
70
+
71
+
72
+ def build_frame_graph_v2(poses, disps, intrinsics, num=16, thresh=24.0, r=2):
73
+ """ construct a frame graph between co-visible frames """
74
+ N = poses.shape[1]
75
+ # poses = poses[0].cpu().numpy()
76
+ # disps = disps[0].cpu().numpy()
77
+ # intrinsics = intrinsics[0].cpu().numpy()
78
+ d = compute_distance_matrix_flow2(poses, disps, intrinsics)
79
+
80
+ # import matplotlib.pyplot as plt
81
+ # plt.imshow(d)
82
+ # plt.show()
83
+
84
+ count = 0
85
+ graph = OrderedDict()
86
+
87
+ for i in range(N):
88
+ graph[i] = []
89
+ d[i,i] = np.inf
90
+ for j in range(i-r, i+r+1):
91
+ if 0 <= j < N and i != j:
92
+ graph[i].append(j)
93
+ d[i,j] = np.inf
94
+ count += 1
95
+
96
+ while 1:
97
+ ix = np.argmin(d)
98
+ i, j = ix // N, ix % N
99
+
100
+ if d[i,j] < thresh:
101
+ graph[i].append(j)
102
+
103
+ for i1 in range(i-1, i+2):
104
+ for j1 in range(j-1, j+2):
105
+ if 0 <= i1 < N and 0 <= j1 < N:
106
+ d[i1, j1] = np.inf
107
+
108
+ count += 1
109
+ else:
110
+ break
111
+
112
+ return graph
113
+
thirdparty/DROID-SLAM/droid_slam/geom/losses.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import OrderedDict
2
+ import numpy as np
3
+ import torch
4
+ from lietorch import SO3, SE3, Sim3
5
+ from .graph_utils import graph_to_edge_list
6
+ from .projective_ops import projective_transform
7
+
8
+
9
+ def pose_metrics(dE):
10
+ """ Translation/Rotation/Scaling metrics from Sim3 """
11
+ t, q, s = dE.data.split([3, 4, 1], -1)
12
+ ang = SO3(q).log().norm(dim=-1)
13
+
14
+ # convert radians to degrees
15
+ r_err = (180 / np.pi) * ang
16
+ t_err = t.norm(dim=-1)
17
+ s_err = (s - 1.0).abs()
18
+ return r_err, t_err, s_err
19
+
20
+
21
+ def fit_scale(Ps, Gs):
22
+ b = Ps.shape[0]
23
+ t1 = Ps.data[...,:3].detach().reshape(b, -1)
24
+ t2 = Gs.data[...,:3].detach().reshape(b, -1)
25
+
26
+ s = (t1*t2).sum(-1) / ((t2*t2).sum(-1) + 1e-8)
27
+ return s
28
+
29
+
30
+ def geodesic_loss(Ps, Gs, graph, gamma=0.9, do_scale=True):
31
+ """ Loss function for training network """
32
+
33
+ # relative pose
34
+ ii, jj, kk = graph_to_edge_list(graph)
35
+ dP = Ps[:,jj] * Ps[:,ii].inv()
36
+
37
+ n = len(Gs)
38
+ geodesic_loss = 0.0
39
+
40
+ for i in range(n):
41
+ w = gamma ** (n - i - 1)
42
+ dG = Gs[i][:,jj] * Gs[i][:,ii].inv()
43
+
44
+ if do_scale:
45
+ s = fit_scale(dP, dG)
46
+ dG = dG.scale(s[:,None])
47
+
48
+ # pose error
49
+ d = (dG * dP.inv()).log()
50
+
51
+ if isinstance(dG, SE3):
52
+ tau, phi = d.split([3,3], dim=-1)
53
+ geodesic_loss += w * (
54
+ tau.norm(dim=-1).mean() +
55
+ phi.norm(dim=-1).mean())
56
+
57
+ elif isinstance(dG, Sim3):
58
+ tau, phi, sig = d.split([3,3,1], dim=-1)
59
+ geodesic_loss += w * (
60
+ tau.norm(dim=-1).mean() +
61
+ phi.norm(dim=-1).mean() +
62
+ 0.05 * sig.norm(dim=-1).mean())
63
+
64
+ dE = Sim3(dG * dP.inv()).detach()
65
+ r_err, t_err, s_err = pose_metrics(dE)
66
+
67
+ metrics = {
68
+ 'rot_error': r_err.mean().item(),
69
+ 'tr_error': t_err.mean().item(),
70
+ 'bad_rot': (r_err < .1).float().mean().item(),
71
+ 'bad_tr': (t_err < .01).float().mean().item(),
72
+ }
73
+
74
+ return geodesic_loss, metrics
75
+
76
+
77
+ def residual_loss(residuals, gamma=0.9):
78
+ """ loss on system residuals """
79
+ residual_loss = 0.0
80
+ n = len(residuals)
81
+
82
+ for i in range(n):
83
+ w = gamma ** (n - i - 1)
84
+ residual_loss += w * residuals[i].abs().mean()
85
+
86
+ return residual_loss, {'residual': residual_loss.item()}
87
+
88
+
89
+ def flow_loss(Ps, disps, poses_est, disps_est, intrinsics, graph, gamma=0.9):
90
+ """ optical flow loss """
91
+
92
+ N = Ps.shape[1]
93
+ graph = OrderedDict()
94
+ for i in range(N):
95
+ graph[i] = [j for j in range(N) if abs(i-j)==1]
96
+
97
+ ii, jj, kk = graph_to_edge_list(graph)
98
+ coords0, val0 = projective_transform(Ps, disps, intrinsics, ii, jj)
99
+ val0 = val0 * (disps[:,ii] > 0).float().unsqueeze(dim=-1)
100
+
101
+ n = len(poses_est)
102
+ flow_loss = 0.0
103
+
104
+ for i in range(n):
105
+ w = gamma ** (n - i - 1)
106
+ coords1, val1 = projective_transform(poses_est[i], disps_est[i], intrinsics, ii, jj)
107
+
108
+ v = (val0 * val1).squeeze(dim=-1)
109
+ epe = v * (coords1 - coords0).norm(dim=-1)
110
+ flow_loss += w * epe.mean()
111
+
112
+ epe = epe.reshape(-1)[v.reshape(-1) > 0.5]
113
+ metrics = {
114
+ 'f_error': epe.mean().item(),
115
+ '1px': (epe<1.0).float().mean().item(),
116
+ }
117
+
118
+ return flow_loss, metrics
thirdparty/DROID-SLAM/droid_slam/geom/projective_ops.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+
4
+ from lietorch import SE3, Sim3
5
+
6
+ MIN_DEPTH = 0.2
7
+
8
+ def extract_intrinsics(intrinsics):
9
+ return intrinsics[...,None,None,:].unbind(dim=-1)
10
+
11
+ def coords_grid(ht, wd, **kwargs):
12
+ y, x = torch.meshgrid(
13
+ torch.arange(ht).to(**kwargs).float(),
14
+ torch.arange(wd).to(**kwargs).float(), indexing='ij')
15
+
16
+ return torch.stack([x, y], dim=-1)
17
+
18
+ def iproj(disps, intrinsics, jacobian=False):
19
+ """ pinhole camera inverse projection """
20
+ ht, wd = disps.shape[2:]
21
+ fx, fy, cx, cy = extract_intrinsics(intrinsics)
22
+
23
+ y, x = torch.meshgrid(
24
+ torch.arange(ht).to(disps.device).float(),
25
+ torch.arange(wd).to(disps.device).float(), indexing='ij')
26
+
27
+ i = torch.ones_like(disps)
28
+ X = (x - cx) / fx
29
+ Y = (y - cy) / fy
30
+ pts = torch.stack([X, Y, i, disps], dim=-1)
31
+
32
+ if jacobian:
33
+ J = torch.zeros_like(pts)
34
+ J[...,-1] = 1.0
35
+ return pts, J
36
+
37
+ return pts, None
38
+
39
+ def proj(Xs, intrinsics, jacobian=False, return_depth=False):
40
+ """ pinhole camera projection """
41
+ fx, fy, cx, cy = extract_intrinsics(intrinsics)
42
+ X, Y, Z, D = Xs.unbind(dim=-1)
43
+
44
+ Z = torch.where(Z < 0.5*MIN_DEPTH, torch.ones_like(Z), Z)
45
+ d = 1.0 / Z
46
+
47
+ x = fx * (X * d) + cx
48
+ y = fy * (Y * d) + cy
49
+ if return_depth:
50
+ coords = torch.stack([x, y, D*d], dim=-1)
51
+ else:
52
+ coords = torch.stack([x, y], dim=-1)
53
+
54
+ if jacobian:
55
+ B, N, H, W = d.shape
56
+ o = torch.zeros_like(d)
57
+ proj_jac = torch.stack([
58
+ fx*d, o, -fx*X*d*d, o,
59
+ o, fy*d, -fy*Y*d*d, o,
60
+ # o, o, -D*d*d, d,
61
+ ], dim=-1).view(B, N, H, W, 2, 4)
62
+
63
+ return coords, proj_jac
64
+
65
+ return coords, None
66
+
67
+ def actp(Gij, X0, jacobian=False):
68
+ """ action on point cloud """
69
+ X1 = Gij[:,:,None,None] * X0
70
+
71
+ if jacobian:
72
+ X, Y, Z, d = X1.unbind(dim=-1)
73
+ o = torch.zeros_like(d)
74
+ B, N, H, W = d.shape
75
+
76
+ if isinstance(Gij, SE3):
77
+ Ja = torch.stack([
78
+ d, o, o, o, Z, -Y,
79
+ o, d, o, -Z, o, X,
80
+ o, o, d, Y, -X, o,
81
+ o, o, o, o, o, o,
82
+ ], dim=-1).view(B, N, H, W, 4, 6)
83
+
84
+ elif isinstance(Gij, Sim3):
85
+ Ja = torch.stack([
86
+ d, o, o, o, Z, -Y, X,
87
+ o, d, o, -Z, o, X, Y,
88
+ o, o, d, Y, -X, o, Z,
89
+ o, o, o, o, o, o, o
90
+ ], dim=-1).view(B, N, H, W, 4, 7)
91
+
92
+ return X1, Ja
93
+
94
+ return X1, None
95
+
96
+ def projective_transform(poses, depths, intrinsics, ii, jj, jacobian=False, return_depth=False):
97
+ """ map points from ii->jj """
98
+
99
+ # inverse project (pinhole)
100
+ X0, Jz = iproj(depths[:,ii], intrinsics[:,ii], jacobian=jacobian)
101
+
102
+ # transform
103
+ Gij = poses[:,jj] * poses[:,ii].inv()
104
+
105
+ Gij.data[:,ii==jj] = torch.as_tensor([-0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], device="cuda")
106
+ X1, Ja = actp(Gij, X0, jacobian=jacobian)
107
+
108
+ # project (pinhole)
109
+ x1, Jp = proj(X1, intrinsics[:,jj], jacobian=jacobian, return_depth=return_depth)
110
+
111
+ # exclude points too close to camera
112
+ valid = ((X1[...,2] > MIN_DEPTH) & (X0[...,2] > MIN_DEPTH)).float()
113
+ valid = valid.unsqueeze(-1)
114
+
115
+ if jacobian:
116
+ # Ji transforms according to dual adjoint
117
+ Jj = torch.matmul(Jp, Ja)
118
+ Ji = -Gij[:,:,None,None,None].adjT(Jj)
119
+
120
+ Jz = Gij[:,:,None,None] * Jz
121
+ Jz = torch.matmul(Jp, Jz.unsqueeze(-1))
122
+
123
+ return x1, valid, (Ji, Jj, Jz)
124
+
125
+ return x1, valid
126
+
127
+ def induced_flow(poses, disps, intrinsics, ii, jj):
128
+ """ optical flow induced by camera motion """
129
+
130
+ ht, wd = disps.shape[2:]
131
+ y, x = torch.meshgrid(
132
+ torch.arange(ht).to(disps.device).float(),
133
+ torch.arange(wd).to(disps.device).float(), indexing='ij')
134
+
135
+ coords0 = torch.stack([x, y], dim=-1)
136
+ coords1, valid = projective_transform(poses, disps, intrinsics, ii, jj, False)
137
+
138
+ return coords1[...,:2] - coords0, valid
139
+
thirdparty/DROID-SLAM/droid_slam/logger.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ from torch.utils.tensorboard import SummaryWriter
4
+
5
+
6
+ SUM_FREQ = 100
7
+
8
+ class Logger:
9
+ def __init__(self, name, scheduler):
10
+ self.total_steps = 0
11
+ self.running_loss = {}
12
+ self.writer = None
13
+ self.name = name
14
+ self.scheduler = scheduler
15
+
16
+ def _print_training_status(self):
17
+ if self.writer is None:
18
+ self.writer = SummaryWriter('runs/%s' % self.name)
19
+ print([k for k in self.running_loss])
20
+
21
+ lr = self.scheduler.get_lr().pop()
22
+ metrics_data = [self.running_loss[k]/SUM_FREQ for k in self.running_loss.keys()]
23
+ training_str = "[{:6d}, {:10.7f}] ".format(self.total_steps+1, lr)
24
+ metrics_str = ("{:10.4f}, "*len(metrics_data)).format(*metrics_data)
25
+
26
+ # print the training status
27
+ print(training_str + metrics_str)
28
+
29
+ for key in self.running_loss:
30
+ val = self.running_loss[key] / SUM_FREQ
31
+ self.writer.add_scalar(key, val, self.total_steps)
32
+ self.running_loss[key] = 0.0
33
+
34
+ def push(self, metrics):
35
+
36
+ for key in metrics:
37
+ if key not in self.running_loss:
38
+ self.running_loss[key] = 0.0
39
+
40
+ self.running_loss[key] += metrics[key]
41
+
42
+ if self.total_steps % SUM_FREQ == SUM_FREQ-1:
43
+ self._print_training_status()
44
+ self.running_loss = {}
45
+
46
+ self.total_steps += 1
47
+
48
+ def write_dict(self, results):
49
+ for key in results:
50
+ self.writer.add_scalar(key, results[key], self.total_steps)
51
+
52
+ def close(self):
53
+ self.writer.close()
54
+
thirdparty/DROID-SLAM/droid_slam/modules/__init__.py ADDED
File without changes
thirdparty/DROID-SLAM/droid_slam/modules/clipping.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ GRAD_CLIP = .01
6
+
7
+ class GradClip(torch.autograd.Function):
8
+ @staticmethod
9
+ def forward(ctx, x):
10
+ return x
11
+
12
+ @staticmethod
13
+ def backward(ctx, grad_x):
14
+ o = torch.zeros_like(grad_x)
15
+ grad_x = torch.where(grad_x.abs()>GRAD_CLIP, o, grad_x)
16
+ grad_x = torch.where(torch.isnan(grad_x), o, grad_x)
17
+ return grad_x
18
+
19
+ class GradientClip(nn.Module):
20
+ def __init__(self):
21
+ super(GradientClip, self).__init__()
22
+
23
+ def forward(self, x):
24
+ return GradClip.apply(x)
thirdparty/DROID-SLAM/droid_slam/modules/corr.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+
4
+ import droid_backends
5
+
6
+ class CorrSampler(torch.autograd.Function):
7
+
8
+ @staticmethod
9
+ def forward(ctx, volume, coords, radius):
10
+ ctx.save_for_backward(volume,coords)
11
+ ctx.radius = radius
12
+ corr, = droid_backends.corr_index_forward(volume, coords, radius)
13
+ return corr
14
+
15
+ @staticmethod
16
+ def backward(ctx, grad_output):
17
+ volume, coords = ctx.saved_tensors
18
+ grad_output = grad_output.contiguous()
19
+ grad_volume, = droid_backends.corr_index_backward(volume, coords, grad_output, ctx.radius)
20
+ return grad_volume, None, None
21
+
22
+
23
+ class CorrBlock:
24
+ def __init__(self, fmap1, fmap2, num_levels=4, radius=3):
25
+ self.num_levels = num_levels
26
+ self.radius = radius
27
+ self.corr_pyramid = []
28
+
29
+ # all pairs correlation
30
+ corr = CorrBlock.corr(fmap1, fmap2)
31
+
32
+ batch, num, h1, w1, h2, w2 = corr.shape
33
+ corr = corr.reshape(batch*num*h1*w1, 1, h2, w2)
34
+
35
+ for i in range(self.num_levels):
36
+ self.corr_pyramid.append(
37
+ corr.view(batch*num, h1, w1, h2//2**i, w2//2**i))
38
+ corr = F.avg_pool2d(corr, 2, stride=2)
39
+
40
+ def __call__(self, coords):
41
+ out_pyramid = []
42
+ batch, num, ht, wd, _ = coords.shape
43
+ coords = coords.permute(0,1,4,2,3)
44
+ coords = coords.contiguous().view(batch*num, 2, ht, wd)
45
+
46
+ for i in range(self.num_levels):
47
+ corr = CorrSampler.apply(self.corr_pyramid[i], coords/2**i, self.radius)
48
+ out_pyramid.append(corr.view(batch, num, -1, ht, wd))
49
+
50
+ return torch.cat(out_pyramid, dim=2)
51
+
52
+ def cat(self, other):
53
+ for i in range(self.num_levels):
54
+ self.corr_pyramid[i] = torch.cat([self.corr_pyramid[i], other.corr_pyramid[i]], 0)
55
+ return self
56
+
57
+ def __getitem__(self, index):
58
+ for i in range(self.num_levels):
59
+ self.corr_pyramid[i] = self.corr_pyramid[i][index]
60
+ return self
61
+
62
+
63
+ @staticmethod
64
+ def corr(fmap1, fmap2):
65
+ """ all-pairs correlation """
66
+ batch, num, dim, ht, wd = fmap1.shape
67
+ fmap1 = fmap1.reshape(batch*num, dim, ht*wd) / 4.0
68
+ fmap2 = fmap2.reshape(batch*num, dim, ht*wd) / 4.0
69
+
70
+ corr = torch.matmul(fmap1.transpose(1,2), fmap2)
71
+ return corr.view(batch, num, ht, wd, ht, wd)
72
+
73
+
74
+ class CorrLayer(torch.autograd.Function):
75
+ @staticmethod
76
+ def forward(ctx, fmap1, fmap2, coords, r):
77
+ ctx.r = r
78
+ ctx.save_for_backward(fmap1, fmap2, coords)
79
+ corr, = droid_backends.altcorr_forward(fmap1, fmap2, coords, ctx.r)
80
+ return corr
81
+
82
+ @staticmethod
83
+ def backward(ctx, grad_corr):
84
+ fmap1, fmap2, coords = ctx.saved_tensors
85
+ grad_corr = grad_corr.contiguous()
86
+ fmap1_grad, fmap2_grad, coords_grad = \
87
+ droid_backends.altcorr_backward(fmap1, fmap2, coords, grad_corr, ctx.r)
88
+ return fmap1_grad, fmap2_grad, coords_grad, None
89
+
90
+
91
+ class AltCorrBlock:
92
+ def __init__(self, fmaps, num_levels=4, radius=3):
93
+ self.num_levels = num_levels
94
+ self.radius = radius
95
+
96
+ B, N, C, H, W = fmaps.shape
97
+ fmaps = fmaps.view(B*N, C, H, W) / 4.0
98
+
99
+ self.pyramid = []
100
+ for i in range(self.num_levels):
101
+ sz = (B, N, H//2**i, W//2**i, C)
102
+ fmap_lvl = fmaps.permute(0, 2, 3, 1).contiguous()
103
+ self.pyramid.append(fmap_lvl.view(*sz))
104
+ fmaps = F.avg_pool2d(fmaps, 2, stride=2)
105
+
106
+ def corr_fn(self, coords, ii, jj):
107
+ B, N, H, W, S, _ = coords.shape
108
+ coords = coords.permute(0, 1, 4, 2, 3, 5)
109
+
110
+ corr_list = []
111
+ for i in range(self.num_levels):
112
+ r = self.radius
113
+ fmap1_i = self.pyramid[0][:, ii]
114
+ fmap2_i = self.pyramid[i][:, jj]
115
+
116
+ coords_i = (coords / 2**i).reshape(B*N, S, H, W, 2).contiguous()
117
+ fmap1_i = fmap1_i.reshape((B*N,) + fmap1_i.shape[2:])
118
+ fmap2_i = fmap2_i.reshape((B*N,) + fmap2_i.shape[2:])
119
+
120
+ corr = CorrLayer.apply(fmap1_i.float(), fmap2_i.float(), coords_i, self.radius)
121
+ corr = corr.view(B, N, S, -1, H, W).permute(0, 1, 3, 4, 5, 2)
122
+ corr_list.append(corr)
123
+
124
+ corr = torch.cat(corr_list, dim=2)
125
+ return corr
126
+
127
+
128
+ def __call__(self, coords, ii, jj):
129
+ squeeze_output = False
130
+ if len(coords.shape) == 5:
131
+ coords = coords.unsqueeze(dim=-2)
132
+ squeeze_output = True
133
+
134
+ corr = self.corr_fn(coords, ii, jj)
135
+
136
+ if squeeze_output:
137
+ corr = corr.squeeze(dim=-1)
138
+
139
+ return corr.contiguous()
140
+
thirdparty/DROID-SLAM/droid_slam/modules/extractor.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+
6
+ class ResidualBlock(nn.Module):
7
+ def __init__(self, in_planes, planes, norm_fn='group', stride=1):
8
+ super(ResidualBlock, self).__init__()
9
+
10
+ self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride)
11
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
12
+ self.relu = nn.ReLU(inplace=True)
13
+
14
+ num_groups = planes // 8
15
+
16
+ if norm_fn == 'group':
17
+ self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
18
+ self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
19
+ if not stride == 1:
20
+ self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
21
+
22
+ elif norm_fn == 'batch':
23
+ self.norm1 = nn.BatchNorm2d(planes)
24
+ self.norm2 = nn.BatchNorm2d(planes)
25
+ if not stride == 1:
26
+ self.norm3 = nn.BatchNorm2d(planes)
27
+
28
+ elif norm_fn == 'instance':
29
+ self.norm1 = nn.InstanceNorm2d(planes)
30
+ self.norm2 = nn.InstanceNorm2d(planes)
31
+ if not stride == 1:
32
+ self.norm3 = nn.InstanceNorm2d(planes)
33
+
34
+ elif norm_fn == 'none':
35
+ self.norm1 = nn.Sequential()
36
+ self.norm2 = nn.Sequential()
37
+ if not stride == 1:
38
+ self.norm3 = nn.Sequential()
39
+
40
+ if stride == 1:
41
+ self.downsample = None
42
+
43
+ else:
44
+ self.downsample = nn.Sequential(
45
+ nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3)
46
+
47
+ def forward(self, x):
48
+ y = x
49
+ y = self.relu(self.norm1(self.conv1(y)))
50
+ y = self.relu(self.norm2(self.conv2(y)))
51
+
52
+ if self.downsample is not None:
53
+ x = self.downsample(x)
54
+
55
+ return self.relu(x+y)
56
+
57
+
58
+ class BottleneckBlock(nn.Module):
59
+ def __init__(self, in_planes, planes, norm_fn='group', stride=1):
60
+ super(BottleneckBlock, self).__init__()
61
+
62
+ self.conv1 = nn.Conv2d(in_planes, planes//4, kernel_size=1, padding=0)
63
+ self.conv2 = nn.Conv2d(planes//4, planes//4, kernel_size=3, padding=1, stride=stride)
64
+ self.conv3 = nn.Conv2d(planes//4, planes, kernel_size=1, padding=0)
65
+ self.relu = nn.ReLU(inplace=True)
66
+
67
+ num_groups = planes // 8
68
+
69
+ if norm_fn == 'group':
70
+ self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
71
+ self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
72
+ self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
73
+ if not stride == 1:
74
+ self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
75
+
76
+ elif norm_fn == 'batch':
77
+ self.norm1 = nn.BatchNorm2d(planes//4)
78
+ self.norm2 = nn.BatchNorm2d(planes//4)
79
+ self.norm3 = nn.BatchNorm2d(planes)
80
+ if not stride == 1:
81
+ self.norm4 = nn.BatchNorm2d(planes)
82
+
83
+ elif norm_fn == 'instance':
84
+ self.norm1 = nn.InstanceNorm2d(planes//4)
85
+ self.norm2 = nn.InstanceNorm2d(planes//4)
86
+ self.norm3 = nn.InstanceNorm2d(planes)
87
+ if not stride == 1:
88
+ self.norm4 = nn.InstanceNorm2d(planes)
89
+
90
+ elif norm_fn == 'none':
91
+ self.norm1 = nn.Sequential()
92
+ self.norm2 = nn.Sequential()
93
+ self.norm3 = nn.Sequential()
94
+ if not stride == 1:
95
+ self.norm4 = nn.Sequential()
96
+
97
+ if stride == 1:
98
+ self.downsample = None
99
+
100
+ else:
101
+ self.downsample = nn.Sequential(
102
+ nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4)
103
+
104
+ def forward(self, x):
105
+ y = x
106
+ y = self.relu(self.norm1(self.conv1(y)))
107
+ y = self.relu(self.norm2(self.conv2(y)))
108
+ y = self.relu(self.norm3(self.conv3(y)))
109
+
110
+ if self.downsample is not None:
111
+ x = self.downsample(x)
112
+
113
+ return self.relu(x+y)
114
+
115
+
116
+ DIM=32
117
+
118
+ class BasicEncoder(nn.Module):
119
+ def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0, multidim=False):
120
+ super(BasicEncoder, self).__init__()
121
+ self.norm_fn = norm_fn
122
+ self.multidim = multidim
123
+
124
+ if self.norm_fn == 'group':
125
+ self.norm1 = nn.GroupNorm(num_groups=8, num_channels=DIM)
126
+
127
+ elif self.norm_fn == 'batch':
128
+ self.norm1 = nn.BatchNorm2d(DIM)
129
+
130
+ elif self.norm_fn == 'instance':
131
+ self.norm1 = nn.InstanceNorm2d(DIM)
132
+
133
+ elif self.norm_fn == 'none':
134
+ self.norm1 = nn.Sequential()
135
+
136
+ self.conv1 = nn.Conv2d(3, DIM, kernel_size=7, stride=2, padding=3)
137
+ self.relu1 = nn.ReLU(inplace=True)
138
+
139
+ self.in_planes = DIM
140
+ self.layer1 = self._make_layer(DIM, stride=1)
141
+ self.layer2 = self._make_layer(2*DIM, stride=2)
142
+ self.layer3 = self._make_layer(4*DIM, stride=2)
143
+
144
+ # output convolution
145
+ self.conv2 = nn.Conv2d(4*DIM, output_dim, kernel_size=1)
146
+
147
+ if self.multidim:
148
+ self.layer4 = self._make_layer(256, stride=2)
149
+ self.layer5 = self._make_layer(512, stride=2)
150
+
151
+ self.in_planes = 256
152
+ self.layer6 = self._make_layer(256, stride=1)
153
+
154
+ self.in_planes = 128
155
+ self.layer7 = self._make_layer(128, stride=1)
156
+
157
+ self.up1 = nn.Conv2d(512, 256, 1)
158
+ self.up2 = nn.Conv2d(256, 128, 1)
159
+ self.conv3 = nn.Conv2d(128, output_dim, kernel_size=1)
160
+
161
+ if dropout > 0:
162
+ self.dropout = nn.Dropout2d(p=dropout)
163
+ else:
164
+ self.dropout = None
165
+
166
+ for m in self.modules():
167
+ if isinstance(m, nn.Conv2d):
168
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
169
+ elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
170
+ if m.weight is not None:
171
+ nn.init.constant_(m.weight, 1)
172
+ if m.bias is not None:
173
+ nn.init.constant_(m.bias, 0)
174
+
175
+ def _make_layer(self, dim, stride=1):
176
+ layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
177
+ layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
178
+ layers = (layer1, layer2)
179
+
180
+ self.in_planes = dim
181
+ return nn.Sequential(*layers)
182
+
183
+ def forward(self, x):
184
+ b, n, c1, h1, w1 = x.shape
185
+ x = x.view(b*n, c1, h1, w1)
186
+
187
+ x = self.conv1(x)
188
+ x = self.norm1(x)
189
+ x = self.relu1(x)
190
+
191
+ x = self.layer1(x)
192
+ x = self.layer2(x)
193
+ x = self.layer3(x)
194
+
195
+ x = self.conv2(x)
196
+
197
+ _, c2, h2, w2 = x.shape
198
+ return x.view(b, n, c2, h2, w2)
thirdparty/DROID-SLAM/droid_slam/modules/gru.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+
5
+ class ConvGRU(nn.Module):
6
+ def __init__(self, h_planes=128, i_planes=128):
7
+ super(ConvGRU, self).__init__()
8
+ self.do_checkpoint = False
9
+ self.convz = nn.Conv2d(h_planes+i_planes, h_planes, 3, padding=1)
10
+ self.convr = nn.Conv2d(h_planes+i_planes, h_planes, 3, padding=1)
11
+ self.convq = nn.Conv2d(h_planes+i_planes, h_planes, 3, padding=1)
12
+
13
+ self.w = nn.Conv2d(h_planes, h_planes, 1, padding=0)
14
+
15
+ self.convz_glo = nn.Conv2d(h_planes, h_planes, 1, padding=0)
16
+ self.convr_glo = nn.Conv2d(h_planes, h_planes, 1, padding=0)
17
+ self.convq_glo = nn.Conv2d(h_planes, h_planes, 1, padding=0)
18
+
19
+ def forward(self, net, *inputs):
20
+ inp = torch.cat(inputs, dim=1)
21
+ net_inp = torch.cat([net, inp], dim=1)
22
+
23
+ b, c, h, w = net.shape
24
+ glo = torch.sigmoid(self.w(net)) * net
25
+ glo = glo.view(b, c, h*w).mean(-1).view(b, c, 1, 1)
26
+
27
+ z = torch.sigmoid(self.convz(net_inp) + self.convz_glo(glo))
28
+ r = torch.sigmoid(self.convr(net_inp) + self.convr_glo(glo))
29
+ q = torch.tanh(self.convq(torch.cat([r*net, inp], dim=1)) + self.convq_glo(glo))
30
+
31
+ net = (1-z) * net + z * q
32
+ return net
33
+
34
+
thirdparty/DROID-SLAM/droid_slam/motion_filter.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import torch
3
+ import lietorch
4
+
5
+ from collections import OrderedDict
6
+ from droid_net import DroidNet
7
+
8
+ import geom.projective_ops as pops
9
+ from modules.corr import CorrBlock
10
+
11
+
12
+ class MotionFilter:
13
+ """ This class is used to filter incoming frames and extract features """
14
+
15
+ def __init__(self, net, video, thresh=2.5, device="cuda:0"):
16
+
17
+ # split net modules
18
+ self.cnet = net.cnet
19
+ self.fnet = net.fnet
20
+ self.update = net.update
21
+
22
+ self.video = video
23
+ self.thresh = thresh
24
+ self.device = device
25
+
26
+ self.count = 0
27
+
28
+ # mean, std for image normalization
29
+ self.MEAN = torch.as_tensor([0.485, 0.456, 0.406], device=self.device)[:, None, None]
30
+ self.STDV = torch.as_tensor([0.229, 0.224, 0.225], device=self.device)[:, None, None]
31
+
32
+ @torch.cuda.amp.autocast(enabled=True)
33
+ def __context_encoder(self, image):
34
+ """ context features """
35
+ net, inp = self.cnet(image).split([128,128], dim=2)
36
+ return net.tanh().squeeze(0), inp.relu().squeeze(0)
37
+
38
+ @torch.cuda.amp.autocast(enabled=True)
39
+ def __feature_encoder(self, image):
40
+ """ features for correlation volume """
41
+ return self.fnet(image).squeeze(0)
42
+
43
+ @torch.cuda.amp.autocast(enabled=True)
44
+ @torch.no_grad()
45
+ def track(self, tstamp, image, depth=None, intrinsics=None, mask=None):
46
+ """ main update operation - run on every frame in video """
47
+
48
+ Id = lietorch.SE3.Identity(1,).data.squeeze()
49
+ ht = image.shape[-2] // 8
50
+ wd = image.shape[-1] // 8
51
+
52
+ # normalize images
53
+ inputs = image[None, :, [2,1,0]].to(self.device) / 255.0
54
+ inputs = inputs.sub_(self.MEAN).div_(self.STDV)
55
+
56
+ # extract features
57
+ gmap = self.__feature_encoder(inputs) # [1, 128, gh, gw]
58
+ if mask is None:
59
+ mask = torch.zeros([gmap.shape[-2], gmap.shape[-1]]).to(gmap)
60
+ # if mask is not None:
61
+ # # bias = self.fnet.conv2.bias.detach().clone().half()
62
+ # # gmap[:,:,mask>0.0] = bias[:, None].repeat(1, (mask>0.0).sum())
63
+ # gmap[:,:,mask>0.0] = 0
64
+
65
+ ### always add first frame to the depth video ###
66
+ if self.video.counter.value == 0:
67
+ net, inp = self.__context_encoder(inputs[:,[0]])
68
+ self.net, self.inp, self.fmap = net, inp, gmap
69
+ self.video.append(tstamp, image[0], Id, 1.0, depth, intrinsics / 8.0, gmap, net[0,0], inp[0,0], mask)
70
+ # msk: torch.Size([64, 48])
71
+ # gmap: torch.Size([1, 128, 64, 48])
72
+ # net: torch.Size([1, 128, 64, 48])
73
+ # inp: torch.Size([1, 128, 64, 48])
74
+
75
+ ### only add new frame if there is enough motion ###
76
+ else:
77
+ # index correlation volume
78
+ coords0 = pops.coords_grid(ht, wd, device=self.device)[None,None]
79
+ corr = CorrBlock(self.fmap[None,[0]], gmap[None,[0]])(coords0)
80
+
81
+ # approximate flow magnitude using 1 update iteration
82
+ _, delta, weight = self.update(self.net[None], self.inp[None], corr)
83
+
84
+ # check motion magnitue / add new frame to video
85
+ if delta.norm(dim=-1).mean().item() > self.thresh:
86
+ self.count = 0
87
+ net, inp = self.__context_encoder(inputs[:,[0]])
88
+ self.net, self.inp, self.fmap = net, inp, gmap
89
+ self.video.append(tstamp, image[0], None, None, depth, intrinsics / 8.0, gmap, net[0], inp[0], mask)
90
+
91
+ else:
92
+ self.count += 1
thirdparty/DROID-SLAM/droid_slam/trajectory_filler.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import torch
3
+ import lietorch
4
+
5
+ from lietorch import SE3
6
+ from collections import OrderedDict
7
+ from factor_graph import FactorGraph
8
+ from droid_net import DroidNet
9
+ import geom.projective_ops as pops
10
+
11
+
12
+ class PoseTrajectoryFiller:
13
+ """ This class is used to fill in non-keyframe poses """
14
+
15
+ def __init__(self, net, video, device="cuda:0"):
16
+
17
+ # split net modules
18
+ self.cnet = net.cnet
19
+ self.fnet = net.fnet
20
+ self.update = net.update
21
+
22
+ self.count = 0
23
+ self.video = video
24
+ self.device = device
25
+
26
+ # mean, std for image normalization
27
+ self.MEAN = torch.as_tensor([0.485, 0.456, 0.406], device=self.device)[:, None, None]
28
+ self.STDV = torch.as_tensor([0.229, 0.224, 0.225], device=self.device)[:, None, None]
29
+
30
+ @torch.cuda.amp.autocast(enabled=True)
31
+ def __feature_encoder(self, image):
32
+ """ features for correlation volume """
33
+ return self.fnet(image)
34
+
35
+ def __fill(self, tstamps, images, intrinsics):
36
+ """ fill operator """
37
+
38
+ tt = torch.as_tensor(tstamps, device="cuda")
39
+ images = torch.stack(images, 0)
40
+ intrinsics = torch.stack(intrinsics, 0)
41
+ inputs = images[:,:,[2,1,0]].to(self.device) / 255.0
42
+
43
+ ### linear pose interpolation ###
44
+ N = self.video.counter.value # number of keyframes
45
+ M = len(tstamps) # 16 frames to fill
46
+
47
+ ts = self.video.tstamp[:N] # tstamp of keyframes
48
+ Ps = SE3(self.video.poses[:N]) # pose of keyframes
49
+
50
+ t0 = torch.as_tensor([ts[ts<=t].shape[0] - 1 for t in tstamps])
51
+ t1 = torch.where(t0<N-1, t0+1, t0)
52
+
53
+ dt = ts[t1] - ts[t0] + 1e-3
54
+ dP = Ps[t1] * Ps[t0].inv()
55
+
56
+ v = dP.log() / dt.unsqueeze(-1)
57
+ w = v * (tt - ts[t0]).unsqueeze(-1)
58
+ Gs = SE3.exp(w) * Ps[t0]
59
+
60
+ # extract features (no need for context features)
61
+ inputs = inputs.sub_(self.MEAN).div_(self.STDV)
62
+ fmap = self.__feature_encoder(inputs)
63
+
64
+ self.video.counter.value += M
65
+ self.video[N:N+M] = (tt, images[:,0], Gs.data, 1, None, intrinsics / 8.0, fmap)
66
+ # print('t0:', t0, 't1:', t1)
67
+ # print('tt:', tt.shape, '\n', tt)
68
+
69
+ # self.video.append(tstamp, image[0], Id, 1.0, depth, intrinsics / 8.0, gmap, net[0,0], inp[0,0], mask)
70
+ # self.video.append(tstamp, image[0], None, None, depth, intrinsics / 8.0, gmap, net[0], inp[0], mask)
71
+
72
+ graph = FactorGraph(self.video, self.update)
73
+ graph.add_factors(t0.cuda(), torch.arange(N, N+M).cuda())
74
+ graph.add_factors(t1.cuda(), torch.arange(N, N+M).cuda())
75
+ # print('graph.ii:', graph.ii)
76
+ # print('graph.jj:', graph.jj)
77
+ # print()
78
+
79
+ for itr in range(6):
80
+ graph.update(N, N+M, motion_only=True)
81
+
82
+ Gs = SE3(self.video.poses[N:N+M].clone())
83
+ self.video.counter.value -= M
84
+
85
+ return [ Gs ]
86
+
87
+ @torch.no_grad()
88
+ def __call__(self, image_stream):
89
+ """ fill in poses of non-keyframe images """
90
+
91
+ # store all camera poses
92
+ pose_list = []
93
+
94
+ tstamps = []
95
+ images = []
96
+ intrinsics = []
97
+
98
+ for (tstamp, image, intrinsic) in image_stream:
99
+ tstamps.append(tstamp)
100
+ images.append(image)
101
+ intrinsics.append(intrinsic)
102
+
103
+ if len(tstamps) == 16:
104
+ pose_list += self.__fill(tstamps, images, intrinsics)
105
+ tstamps, images, intrinsics = [], [], []
106
+
107
+ if len(tstamps) > 0:
108
+ pose_list += self.__fill(tstamps, images, intrinsics)
109
+
110
+ # stitch pose segments together
111
+ return lietorch.cat(pose_list, 0)
112
+
thirdparty/DROID-SLAM/droid_slam/vis_headless.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import lietorch
4
+ import droid_backends
5
+ import time
6
+ import argparse
7
+ import numpy as np
8
+
9
+ # import os
10
+ # os.environ['PYOPENGL_PLATFORM'] = 'egl'
11
+ #os.environ['PYOPENGL_PLATFORM'] = 'osmesa'
12
+ import open3d as o3d
13
+
14
+ # o3d.visualization.webrtc_server.enable_webrtc()
15
+
16
+ from lietorch import SE3
17
+ import geom.projective_ops as pops
18
+
19
+
20
+ CAM_POINTS = np.array([
21
+ [ 0, 0, 0],
22
+ [-1, -1, 1.5],
23
+ [ 1, -1, 1.5],
24
+ [ 1, 1, 1.5],
25
+ [-1, 1, 1.5],
26
+ [-0.5, 1, 1.5],
27
+ [ 0.5, 1, 1.5],
28
+ [ 0, 1.2, 1.5]])
29
+
30
+ CAM_LINES = np.array([
31
+ [1,2], [2,3], [3,4], [4,1], [1,0], [0,2], [3,0], [0,4], [5,7], [7,6]])
32
+
33
+ def white_balance(img):
34
+ # from https://stackoverflow.com/questions/46390779/automatic-white-balancing-with-grayworld-assumption
35
+ result = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
36
+ avg_a = np.average(result[:, :, 1])
37
+ avg_b = np.average(result[:, :, 2])
38
+ result[:, :, 1] = result[:, :, 1] - ((avg_a - 128) * (result[:, :, 0] / 255.0) * 1.1)
39
+ result[:, :, 2] = result[:, :, 2] - ((avg_b - 128) * (result[:, :, 0] / 255.0) * 1.1)
40
+ result = cv2.cvtColor(result, cv2.COLOR_LAB2BGR)
41
+ return result
42
+
43
+
44
+ def create_camera_actor(g, scale=0.05):
45
+ """ build open3d camera polydata """
46
+ camera_actor = o3d.geometry.LineSet(
47
+ points=o3d.utility.Vector3dVector(scale * CAM_POINTS),
48
+ lines=o3d.utility.Vector2iVector(CAM_LINES))
49
+
50
+ color = (g * 1.0, 0.5 * (1-g), 0.9 * (1-g))
51
+ camera_actor.paint_uniform_color(color)
52
+ return camera_actor
53
+
54
+
55
+ def create_point_actor(points, colors):
56
+ """ open3d point cloud from numpy array """
57
+ point_cloud = o3d.geometry.PointCloud()
58
+ point_cloud.points = o3d.utility.Vector3dVector(points)
59
+ point_cloud.colors = o3d.utility.Vector3dVector(colors)
60
+ return point_cloud
61
+
62
+
63
+ def droid_visualization(video, save_path, device="cuda:0"):
64
+ """ DROID visualization frontend """
65
+
66
+ torch.cuda.set_device(0)
67
+ droid_visualization.video = video
68
+ droid_visualization.cameras = {}
69
+ droid_visualization.points = {}
70
+ droid_visualization.warmup = 8
71
+ droid_visualization.scale = 1.0
72
+ droid_visualization.ix = 0
73
+ print("headless droid_visualization")
74
+
75
+
76
+ droid_visualization.filter_thresh = 0.3 #0.005
77
+
78
+ def increase_filter(vis):
79
+ droid_visualization.filter_thresh *= 2
80
+ with droid_visualization.video.get_lock():
81
+ droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
82
+
83
+ def decrease_filter(vis):
84
+ droid_visualization.filter_thresh *= 0.5
85
+ with droid_visualization.video.get_lock():
86
+ droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
87
+
88
+ def animation_callback(vis):
89
+ cam = vis.get_view_control().convert_to_pinhole_camera_parameters()
90
+
91
+ with torch.no_grad():
92
+
93
+ with video.get_lock():
94
+ t = video.counter.value
95
+ dirty_index, = torch.where(video.dirty.clone())
96
+ dirty_index = dirty_index
97
+
98
+ if len(dirty_index) == 0:
99
+ return
100
+
101
+ video.dirty[dirty_index] = False
102
+
103
+ # convert poses to 4x4 matrix
104
+ poses = torch.index_select(video.poses, 0, dirty_index)
105
+ disps = torch.index_select(video.disps, 0, dirty_index)
106
+ Ps = SE3(poses).inv().matrix().cpu().numpy()
107
+
108
+ images = torch.index_select(video.images, 0, dirty_index)
109
+ images = images.cpu()[:,[2,1,0],3::8,3::8].permute(0,2,3,1) / 255.0
110
+ points = droid_backends.iproj(SE3(poses).inv().data, disps, video.intrinsics[0]).cpu()
111
+
112
+ thresh = droid_visualization.filter_thresh * torch.ones_like(disps.mean(dim=[1,2]))
113
+
114
+ count = droid_backends.depth_filter(
115
+ video.poses, video.disps, video.intrinsics[0], dirty_index, thresh)
116
+
117
+ count = count.cpu()
118
+ disps = disps.cpu()
119
+ masks = ((count >= 2) & (disps > .5*disps.mean(dim=[1,2], keepdim=True)))
120
+
121
+ for i in range(len(dirty_index)):
122
+ pose = Ps[i]
123
+ ix = dirty_index[i].item()
124
+
125
+ if ix in droid_visualization.cameras:
126
+ vis.remove_geometry(droid_visualization.cameras[ix])
127
+ del droid_visualization.cameras[ix]
128
+
129
+ if ix in droid_visualization.points:
130
+ vis.remove_geometry(droid_visualization.points[ix])
131
+ del droid_visualization.points[ix]
132
+
133
+ ### add camera actor ###
134
+ cam_actor = create_camera_actor(True)
135
+ cam_actor.transform(pose)
136
+ vis.add_geometry(cam_actor)
137
+ droid_visualization.cameras[ix] = cam_actor
138
+
139
+
140
+ mask = masks[i].reshape(-1)
141
+ pts = points[i].reshape(-1, 3)[mask].cpu().numpy()
142
+ clr = images[i].reshape(-1, 3)[mask].cpu().numpy()
143
+
144
+ ## add point actor ###
145
+ point_actor = create_point_actor(pts, clr)
146
+ vis.add_geometry(point_actor)
147
+ droid_visualization.points[ix] = point_actor
148
+
149
+ ### Hack to save Point Cloud Data and Camnera results ###
150
+
151
+ # Save points
152
+ pcd_points = o3d.geometry.PointCloud()
153
+ for p in droid_visualization.points.items():
154
+ pcd_points += p[1]
155
+ o3d.io.write_point_cloud(f"{save_path}/points.ply", pcd_points, write_ascii=False)
156
+
157
+ # Save pose
158
+ pcd_camera = create_camera_actor(True)
159
+ for c in droid_visualization.cameras.items():
160
+ pcd_camera += c[1]
161
+
162
+ o3d.io.write_line_set(f"{save_path}/camera.ply", pcd_camera, write_ascii=False)
163
+
164
+ ### end ###
165
+
166
+ # hack to allow interacting with vizualization during inference
167
+ if len(droid_visualization.cameras) >= droid_visualization.warmup:
168
+ cam = vis.get_view_control().convert_from_pinhole_camera_parameters(cam)
169
+
170
+ droid_visualization.ix += 1
171
+ vis.poll_events()
172
+ vis.update_renderer()
173
+
174
+ ### create Open3D visualization ###
175
+ vis = o3d.visualization.VisualizerWithKeyCallback()
176
+ vis.register_animation_callback(animation_callback)
177
+ vis.register_key_callback(ord("S"), increase_filter)
178
+ vis.register_key_callback(ord("A"), decrease_filter)
179
+
180
+ vis.create_window(height=540, width=960)
181
+ # vis.create_window(height=512, width=384)
182
+ vis.get_render_option().load_from_json("thirdparty/DROID-SLAM//misc/renderoption.json")
183
+
184
+ vis.run()
185
+ vis.destroy_window()
thirdparty/DROID-SLAM/droid_slam/visualization.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import lietorch
4
+ import droid_backends
5
+ import time
6
+ import argparse
7
+ import numpy as np
8
+ import open3d as o3d
9
+
10
+ from lietorch import SE3
11
+ import geom.projective_ops as pops
12
+
13
+ CAM_POINTS = np.array([
14
+ [ 0, 0, 0],
15
+ [-1, -1, 1.5],
16
+ [ 1, -1, 1.5],
17
+ [ 1, 1, 1.5],
18
+ [-1, 1, 1.5],
19
+ [-0.5, 1, 1.5],
20
+ [ 0.5, 1, 1.5],
21
+ [ 0, 1.2, 1.5]])
22
+
23
+ CAM_LINES = np.array([
24
+ [1,2], [2,3], [3,4], [4,1], [1,0], [0,2], [3,0], [0,4], [5,7], [7,6]])
25
+
26
+ def white_balance(img):
27
+ # from https://stackoverflow.com/questions/46390779/automatic-white-balancing-with-grayworld-assumption
28
+ result = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
29
+ avg_a = np.average(result[:, :, 1])
30
+ avg_b = np.average(result[:, :, 2])
31
+ result[:, :, 1] = result[:, :, 1] - ((avg_a - 128) * (result[:, :, 0] / 255.0) * 1.1)
32
+ result[:, :, 2] = result[:, :, 2] - ((avg_b - 128) * (result[:, :, 0] / 255.0) * 1.1)
33
+ result = cv2.cvtColor(result, cv2.COLOR_LAB2BGR)
34
+ return result
35
+
36
+ def create_camera_actor(g, scale=0.05):
37
+ """ build open3d camera polydata """
38
+ camera_actor = o3d.geometry.LineSet(
39
+ points=o3d.utility.Vector3dVector(scale * CAM_POINTS),
40
+ lines=o3d.utility.Vector2iVector(CAM_LINES))
41
+
42
+ color = (g * 1.0, 0.5 * (1-g), 0.9 * (1-g))
43
+ camera_actor.paint_uniform_color(color)
44
+ return camera_actor
45
+
46
+ def create_point_actor(points, colors):
47
+ """ open3d point cloud from numpy array """
48
+ point_cloud = o3d.geometry.PointCloud()
49
+ point_cloud.points = o3d.utility.Vector3dVector(points)
50
+ point_cloud.colors = o3d.utility.Vector3dVector(colors)
51
+ return point_cloud
52
+
53
+ def droid_visualization(video, device="cuda:0"):
54
+ """ DROID visualization frontend """
55
+
56
+ torch.cuda.set_device(device)
57
+ droid_visualization.video = video
58
+ droid_visualization.cameras = {}
59
+ droid_visualization.points = {}
60
+ droid_visualization.warmup = 8
61
+ droid_visualization.scale = 1.0
62
+ droid_visualization.ix = 0
63
+
64
+ droid_visualization.filter_thresh = 0.005
65
+
66
+ def increase_filter(vis):
67
+ droid_visualization.filter_thresh *= 2
68
+ with droid_visualization.video.get_lock():
69
+ droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
70
+
71
+ def decrease_filter(vis):
72
+ droid_visualization.filter_thresh *= 0.5
73
+ with droid_visualization.video.get_lock():
74
+ droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
75
+
76
+ #file dialog based pointcloud export added#
77
+ def export_pointcloud(vis):
78
+ gui.Application.instance.initialize()
79
+ window = gui.Application.instance.create_window("Export", 350, 600)
80
+
81
+ def _on_filedlg_cancel():
82
+ window.close_dialog()
83
+ window.close()
84
+ gui.Application.instance.quit()
85
+
86
+ def _on_filedlg_done(path):
87
+ pcd_export(path)
88
+ window.close_dialog()
89
+ gui.Application.instance.quit()
90
+
91
+ def exec_file_dialog():
92
+ filedlg = gui.FileDialog(gui.FileDialog.SAVE, "Select file", window.theme)
93
+
94
+ filedlg.add_filter(".ply .xyz .pcd", "PointCloud (.xyz .ply .pcd)")
95
+ filedlg.add_filter("", "All files")
96
+ filedlg.set_on_cancel(_on_filedlg_cancel)
97
+ filedlg.set_on_done(_on_filedlg_done)
98
+ window.show_dialog(filedlg)
99
+
100
+ def pcd_export(path):
101
+ print("\nExporting pointcloud as", path)
102
+ final_pcd = o3d.geometry.PointCloud()
103
+ for p in droid_visualization.points.items():
104
+ final_pcd += p[1]
105
+
106
+ o3d.io.write_point_cloud(path, final_pcd, write_ascii=False)
107
+ #vis.capture_depth_point_cloud("/home/bertuser/droidslam_export.ply")
108
+
109
+ exec_file_dialog()
110
+
111
+ def animation_callback(vis):
112
+ cam = vis.get_view_control().convert_to_pinhole_camera_parameters()
113
+
114
+ with torch.no_grad():
115
+
116
+ with video.get_lock():
117
+ t = video.counter.value
118
+ dirty_index, = torch.where(video.dirty.clone())
119
+ dirty_index = dirty_index
120
+
121
+ if len(dirty_index) == 0:
122
+ return
123
+
124
+ video.dirty[dirty_index] = False
125
+
126
+ # convert poses to 4x4 matrix
127
+ poses = torch.index_select(video.poses, 0, dirty_index)
128
+ disps = torch.index_select(video.disps, 0, dirty_index)
129
+ Ps = SE3(poses).inv().matrix().cpu().numpy()
130
+
131
+ images = torch.index_select(video.images, 0, dirty_index)
132
+ images = images.cpu()[:,[2,1,0],3::8,3::8].permute(0,2,3,1) / 255.0
133
+ points = droid_backends.iproj(SE3(poses).inv().data, disps, video.intrinsics[0]).cpu()
134
+
135
+ thresh = droid_visualization.filter_thresh * torch.ones_like(disps.mean(dim=[1,2]))
136
+
137
+ count = droid_backends.depth_filter(
138
+ video.poses, video.disps, video.intrinsics[0], dirty_index, thresh)
139
+
140
+ count = count.cpu()
141
+ disps = disps.cpu()
142
+ masks = ((count >= 2) & (disps > .5*disps.mean(dim=[1,2], keepdim=True)))
143
+
144
+ for i in range(len(dirty_index)):
145
+ pose = Ps[i]
146
+ ix = dirty_index[i].item()
147
+
148
+ if ix in droid_visualization.cameras:
149
+ vis.remove_geometry(droid_visualization.cameras[ix])
150
+ del droid_visualization.cameras[ix]
151
+
152
+ if ix in droid_visualization.points:
153
+ vis.remove_geometry(droid_visualization.points[ix])
154
+ del droid_visualization.points[ix]
155
+
156
+ ### add camera actor ###
157
+ cam_actor = create_camera_actor(True)
158
+ cam_actor.transform(pose)
159
+ vis.add_geometry(cam_actor)
160
+ droid_visualization.cameras[ix] = cam_actor
161
+
162
+ mask = masks[i].reshape(-1)
163
+ pts = points[i].reshape(-1, 3)[mask].cpu().numpy()
164
+ clr = images[i].reshape(-1, 3)[mask].cpu().numpy()
165
+
166
+ ## add point actor ###
167
+ point_actor = create_point_actor(pts, clr)
168
+ vis.add_geometry(point_actor)
169
+ droid_visualization.points[ix] = point_actor
170
+
171
+ # hack to allow interacting with vizualization during inference
172
+ if len(droid_visualization.cameras) >= droid_visualization.warmup:
173
+ cam = vis.get_view_control().convert_from_pinhole_camera_parameters(cam)
174
+
175
+ droid_visualization.ix += 1
176
+ vis.poll_events()
177
+ vis.update_renderer()
178
+
179
+ ### create Open3D visualization ###
180
+ vis = o3d.visualization.VisualizerWithKeyCallback()
181
+ vis.register_animation_callback(animation_callback)
182
+ vis.register_key_callback(ord("S"), increase_filter)
183
+ vis.register_key_callback(ord("A"), decrease_filter)
184
+
185
+ vis.create_window(height=540, width=960)
186
+ vis.get_render_option().load_from_json("misc/renderoption.json")
187
+
188
+ vis.run()
189
+ vis.destroy_window()
thirdparty/DROID-SLAM/environment.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: droidenv
2
+ channels:
3
+ - rusty1s
4
+ - pytorch
5
+ - open3d-admin
6
+ - nvidia
7
+ - conda-forge
8
+ - defaults
9
+ dependencies:
10
+ - pytorch-scatter
11
+ - torchaudio
12
+ - torchvision
13
+ - open3d
14
+ - pytorch=1.10
15
+ - cudatoolkit=11.3
16
+ - tensorboard
17
+ - scipy
18
+ - opencv
19
+ - tqdm
20
+ - suitesparse
21
+ - matplotlib
22
+ - pyyaml
thirdparty/DROID-SLAM/environment_novis.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: droidenv
2
+ channels:
3
+ - rusty1s
4
+ - pytorch
5
+ - nvidia
6
+ - conda-forge
7
+ - defaults
8
+ dependencies:
9
+ - pytorch-scatter
10
+ - torchaudio
11
+ - torchvision
12
+ - pytorch=1.10
13
+ - cudatoolkit=11.3
14
+ - tensorboard
15
+ - scipy
16
+ - opencv
17
+ - tqdm
18
+ - suitesparse
19
+ - matplotlib
20
+ - pyyaml
thirdparty/DROID-SLAM/evaluation_scripts/test_eth3d.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('droid_slam')
3
+
4
+ from tqdm import tqdm
5
+ import numpy as np
6
+ import torch
7
+ import lietorch
8
+ import cv2
9
+ import os
10
+ import glob
11
+ import time
12
+ import argparse
13
+
14
+ import torch.nn.functional as F
15
+ from droid import Droid
16
+
17
+ import matplotlib.pyplot as plt
18
+
19
+
20
+ def show_image(image):
21
+ image = image.permute(1, 2, 0).cpu().numpy()
22
+ cv2.imshow('image', image / 255.0)
23
+ cv2.waitKey(1)
24
+
25
+ def image_stream(datapath, use_depth=False, stride=1):
26
+ """ image generator """
27
+
28
+ fx, fy, cx, cy = np.loadtxt(os.path.join(datapath, 'calibration.txt')).tolist()
29
+ image_list = sorted(glob.glob(os.path.join(datapath, 'rgb', '*.png')))[::stride]
30
+ depth_list = sorted(glob.glob(os.path.join(datapath, 'depth', '*.png')))[::stride]
31
+
32
+ for t, (image_file, depth_file) in enumerate(zip(image_list, depth_list)):
33
+ image = cv2.imread(image_file)
34
+ depth = cv2.imread(depth_file, cv2.IMREAD_ANYDEPTH) / 5000.0
35
+
36
+ h0, w0, _ = image.shape
37
+ h1 = int(h0 * np.sqrt((384 * 512) / (h0 * w0)))
38
+ w1 = int(w0 * np.sqrt((384 * 512) / (h0 * w0)))
39
+
40
+ image = cv2.resize(image, (w1, h1))
41
+ image = image[:h1-h1%8, :w1-w1%8]
42
+ image = torch.as_tensor(image).permute(2, 0, 1)
43
+
44
+ depth = torch.as_tensor(depth)
45
+ depth = F.interpolate(depth[None,None], (h1, w1)).squeeze()
46
+ depth = depth[:h1-h1%8, :w1-w1%8]
47
+
48
+ intrinsics = torch.as_tensor([fx, fy, cx, cy])
49
+ intrinsics[0::2] *= (w1 / w0)
50
+ intrinsics[1::2] *= (h1 / h0)
51
+
52
+ if use_depth:
53
+ yield t, image[None], depth, intrinsics
54
+
55
+ else:
56
+ yield t, image[None], intrinsics
57
+
58
+ if __name__ == '__main__':
59
+ parser = argparse.ArgumentParser()
60
+ parser.add_argument("--datapath")
61
+ parser.add_argument("--weights", default="droid.pth")
62
+ parser.add_argument("--buffer", type=int, default=1024)
63
+ parser.add_argument("--image_size", default=[240, 320])
64
+ parser.add_argument("--disable_vis", action="store_true")
65
+
66
+ parser.add_argument("--beta", type=float, default=0.5)
67
+ parser.add_argument("--filter_thresh", type=float, default=2.0)
68
+ parser.add_argument("--warmup", type=int, default=8)
69
+ parser.add_argument("--keyframe_thresh", type=float, default=3.5)
70
+ parser.add_argument("--frontend_thresh", type=float, default=16.0)
71
+ parser.add_argument("--frontend_window", type=int, default=16)
72
+ parser.add_argument("--frontend_radius", type=int, default=1)
73
+ parser.add_argument("--frontend_nms", type=int, default=0)
74
+
75
+ parser.add_argument("--stereo", action="store_true")
76
+ parser.add_argument("--depth", action="store_true")
77
+
78
+ parser.add_argument("--backend_thresh", type=float, default=22.0)
79
+ parser.add_argument("--backend_radius", type=int, default=2)
80
+ parser.add_argument("--backend_nms", type=int, default=3)
81
+ args = parser.parse_args()
82
+
83
+ torch.multiprocessing.set_start_method('spawn')
84
+
85
+ print("Running evaluation on {}".format(args.datapath))
86
+ print(args)
87
+
88
+ # this can usually be set to 2-3 except for "camera_shake" scenes
89
+ # set to 2 for test scenes
90
+ stride = 1
91
+
92
+ tstamps = []
93
+ for (t, image, depth, intrinsics) in tqdm(image_stream(args.datapath, use_depth=True, stride=stride)):
94
+ if not args.disable_vis:
95
+ show_image(image[0])
96
+
97
+ if t == 0:
98
+ args.image_size = [image.shape[2], image.shape[3]]
99
+ droid = Droid(args)
100
+
101
+ droid.track(t, image, depth, intrinsics=intrinsics)
102
+
103
+ traj_est = droid.terminate(image_stream(args.datapath, use_depth=False, stride=stride))
104
+
105
+ ### run evaluation ###
106
+
107
+ print("#"*20 + " Results...")
108
+
109
+ import evo
110
+ from evo.core.trajectory import PoseTrajectory3D
111
+ from evo.tools import file_interface
112
+ from evo.core import sync
113
+ import evo.main_ape as main_ape
114
+ from evo.core.metrics import PoseRelation
115
+
116
+ image_path = os.path.join(args.datapath, 'rgb')
117
+ images_list = sorted(glob.glob(os.path.join(image_path, '*.png')))[::stride]
118
+ tstamps = [float(x.split('/')[-1][:-4]) for x in images_list]
119
+
120
+ traj_est = PoseTrajectory3D(
121
+ positions_xyz=traj_est[:,:3],
122
+ orientations_quat_wxyz=traj_est[:,3:],
123
+ timestamps=np.array(tstamps))
124
+
125
+ gt_file = os.path.join(args.datapath, 'groundtruth.txt')
126
+ traj_ref = file_interface.read_tum_trajectory_file(gt_file)
127
+
128
+ traj_ref, traj_est = sync.associate_trajectories(traj_ref, traj_est)
129
+
130
+ result = main_ape.ape(traj_ref, traj_est, est_name='traj',
131
+ pose_relation=PoseRelation.translation_part, align=True, correct_scale=False)
132
+
133
+ print(result.stats)
134
+
thirdparty/DROID-SLAM/evaluation_scripts/test_euroc.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('droid_slam')
3
+
4
+ from tqdm import tqdm
5
+ import numpy as np
6
+ import torch
7
+ import lietorch
8
+ import cv2
9
+ import os
10
+ import glob
11
+ import time
12
+ import argparse
13
+
14
+ from torch.multiprocessing import Process
15
+ from droid import Droid
16
+
17
+ import torch.nn.functional as F
18
+
19
+
20
+
21
+ def show_image(image):
22
+ image = image.permute(1, 2, 0).cpu().numpy()
23
+ cv2.imshow('image', image / 255.0)
24
+ cv2.waitKey(1)
25
+
26
+ def image_stream(datapath, image_size=[320, 512], stereo=False, stride=1):
27
+ """ image generator """
28
+
29
+ K_l = np.array([458.654, 0.0, 367.215, 0.0, 457.296, 248.375, 0.0, 0.0, 1.0]).reshape(3,3)
30
+ d_l = np.array([-0.28340811, 0.07395907, 0.00019359, 1.76187114e-05, 0.0])
31
+ R_l = np.array([
32
+ 0.999966347530033, -0.001422739138722922, 0.008079580483432283,
33
+ 0.001365741834644127, 0.9999741760894847, 0.007055629199258132,
34
+ -0.008089410156878961, -0.007044357138835809, 0.9999424675829176
35
+ ]).reshape(3,3)
36
+
37
+ P_l = np.array([435.2046959714599, 0, 367.4517211914062, 0, 0, 435.2046959714599, 252.2008514404297, 0, 0, 0, 1, 0]).reshape(3,4)
38
+ map_l = cv2.initUndistortRectifyMap(K_l, d_l, R_l, P_l[:3,:3], (752, 480), cv2.CV_32F)
39
+
40
+ K_r = np.array([457.587, 0.0, 379.999, 0.0, 456.134, 255.238, 0.0, 0.0, 1]).reshape(3,3)
41
+ d_r = np.array([-0.28368365, 0.07451284, -0.00010473, -3.555907e-05, 0.0]).reshape(5)
42
+ R_r = np.array([
43
+ 0.9999633526194376, -0.003625811871560086, 0.007755443660172947,
44
+ 0.003680398547259526, 0.9999684752771629, -0.007035845251224894,
45
+ -0.007729688520722713, 0.007064130529506649, 0.999945173484644
46
+ ]).reshape(3,3)
47
+
48
+ P_r = np.array([435.2046959714599, 0, 367.4517211914062, -47.90639384423901, 0, 435.2046959714599, 252.2008514404297, 0, 0, 0, 1, 0]).reshape(3,4)
49
+ map_r = cv2.initUndistortRectifyMap(K_r, d_r, R_r, P_r[:3,:3], (752, 480), cv2.CV_32F)
50
+
51
+ intrinsics_vec = [435.2046959714599, 435.2046959714599, 367.4517211914062, 252.2008514404297]
52
+ ht0, wd0 = [480, 752]
53
+
54
+ # read all png images in folder
55
+ images_left = sorted(glob.glob(os.path.join(datapath, 'mav0/cam0/data/*.png')))[::stride]
56
+ images_right = [x.replace('cam0', 'cam1') for x in images_left]
57
+
58
+ for t, (imgL, imgR) in enumerate(zip(images_left, images_right)):
59
+ if stereo and not os.path.isfile(imgR):
60
+ continue
61
+ tstamp = float(imgL.split('/')[-1][:-4])
62
+ images = [cv2.remap(cv2.imread(imgL), map_l[0], map_l[1], interpolation=cv2.INTER_LINEAR)]
63
+ if stereo:
64
+ images += [cv2.remap(cv2.imread(imgR), map_r[0], map_r[1], interpolation=cv2.INTER_LINEAR)]
65
+
66
+ images = torch.from_numpy(np.stack(images, 0))
67
+ images = images.permute(0, 3, 1, 2).to("cuda:0", dtype=torch.float32)
68
+ images = F.interpolate(images, image_size, mode="bilinear", align_corners=False)
69
+
70
+ intrinsics = torch.as_tensor(intrinsics_vec).cuda()
71
+ intrinsics[0] *= image_size[1] / wd0
72
+ intrinsics[1] *= image_size[0] / ht0
73
+ intrinsics[2] *= image_size[1] / wd0
74
+ intrinsics[3] *= image_size[0] / ht0
75
+
76
+ yield stride*t, images, intrinsics
77
+
78
+
79
+ if __name__ == '__main__':
80
+ parser = argparse.ArgumentParser()
81
+ parser.add_argument("--datapath", help="path to euroc sequence")
82
+ parser.add_argument("--gt", help="path to gt file")
83
+ parser.add_argument("--weights", default="droid.pth")
84
+ parser.add_argument("--buffer", type=int, default=512)
85
+ parser.add_argument("--image_size", default=[320,512])
86
+ parser.add_argument("--disable_vis", action="store_true")
87
+ parser.add_argument("--stereo", action="store_true")
88
+
89
+ parser.add_argument("--beta", type=float, default=0.3)
90
+ parser.add_argument("--filter_thresh", type=float, default=2.4)
91
+ parser.add_argument("--warmup", type=int, default=15)
92
+ parser.add_argument("--keyframe_thresh", type=float, default=3.5)
93
+ parser.add_argument("--frontend_thresh", type=float, default=17.5)
94
+ parser.add_argument("--frontend_window", type=int, default=20)
95
+ parser.add_argument("--frontend_radius", type=int, default=2)
96
+ parser.add_argument("--frontend_nms", type=int, default=1)
97
+
98
+ parser.add_argument("--backend_thresh", type=float, default=24.0)
99
+ parser.add_argument("--backend_radius", type=int, default=2)
100
+ parser.add_argument("--backend_nms", type=int, default=2)
101
+ args = parser.parse_args()
102
+
103
+ torch.multiprocessing.set_start_method('spawn')
104
+
105
+ print("Running evaluation on {}".format(args.datapath))
106
+ print(args)
107
+
108
+ droid = Droid(args)
109
+ time.sleep(5)
110
+
111
+ for (t, image, intrinsics) in tqdm(image_stream(args.datapath, stereo=args.stereo, stride=2)):
112
+ droid.track(t, image, intrinsics=intrinsics)
113
+
114
+ traj_est = droid.terminate(image_stream(args.datapath, stride=1))
115
+
116
+ ### run evaluation ###
117
+
118
+ import evo
119
+ from evo.core.trajectory import PoseTrajectory3D
120
+ from evo.tools import file_interface
121
+ from evo.core import sync
122
+ import evo.main_ape as main_ape
123
+ from evo.core.metrics import PoseRelation
124
+
125
+ images_list = sorted(glob.glob(os.path.join(args.datapath, 'mav0/cam0/data/*.png')))
126
+ tstamps = [float(x.split('/')[-1][:-4]) for x in images_list]
127
+
128
+ traj_est = PoseTrajectory3D(
129
+ positions_xyz=1.10 * traj_est[:,:3],
130
+ orientations_quat_wxyz=traj_est[:,3:],
131
+ timestamps=np.array(tstamps))
132
+
133
+ traj_ref = file_interface.read_tum_trajectory_file(args.gt)
134
+
135
+ traj_ref, traj_est = sync.associate_trajectories(traj_ref, traj_est)
136
+
137
+ result = main_ape.ape(traj_ref, traj_est, est_name='traj',
138
+ pose_relation=PoseRelation.translation_part, align=True, correct_scale=True)
139
+
140
+ print(result)
141
+
142
+
thirdparty/DROID-SLAM/evaluation_scripts/test_tum.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('droid_slam')
3
+
4
+ from tqdm import tqdm
5
+ import numpy as np
6
+ import torch
7
+ import lietorch
8
+ import cv2
9
+ import os
10
+ import glob
11
+ import time
12
+ import argparse
13
+
14
+ import torch.nn.functional as F
15
+ from droid import Droid
16
+
17
+
18
+ def show_image(image):
19
+ image = image.permute(1, 2, 0).cpu().numpy()
20
+ cv2.imshow('image', image / 255.0)
21
+ cv2.waitKey(1)
22
+
23
+ def image_stream(datapath, image_size=[320, 512]):
24
+ """ image generator """
25
+
26
+ fx, fy, cx, cy = 517.3, 516.5, 318.6, 255.3
27
+
28
+ K_l = np.array([fx, 0.0, cx, 0.0, fy, cy, 0.0, 0.0, 1.0]).reshape(3,3)
29
+ d_l = np.array([0.2624, -0.9531, -0.0054, 0.0026, 1.1633])
30
+
31
+ # read all png images in folder
32
+ images_list = sorted(glob.glob(os.path.join(datapath, 'rgb', '*.png')))[::2]
33
+
34
+ for t, imfile in enumerate(images_list):
35
+ image = cv2.imread(imfile)
36
+ ht0, wd0, _ = image.shape
37
+ image = cv2.undistort(image, K_l, d_l)
38
+ image = cv2.resize(image, (320+32, 240+16))
39
+ image = torch.from_numpy(image).permute(2,0,1)
40
+
41
+ intrinsics = torch.as_tensor([fx, fy, cx, cy]).cuda()
42
+ intrinsics[0] *= image.shape[2] / 640.0
43
+ intrinsics[1] *= image.shape[1] / 480.0
44
+ intrinsics[2] *= image.shape[2] / 640.0
45
+ intrinsics[3] *= image.shape[1] / 480.0
46
+
47
+ # crop image to remove distortion boundary
48
+ intrinsics[2] -= 16
49
+ intrinsics[3] -= 8
50
+ image = image[:, 8:-8, 16:-16]
51
+
52
+ yield t, image[None], intrinsics
53
+
54
+ if __name__ == '__main__':
55
+ parser = argparse.ArgumentParser()
56
+ parser.add_argument("--datapath")
57
+ parser.add_argument("--weights", default="droid.pth")
58
+ parser.add_argument("--buffer", type=int, default=512)
59
+ parser.add_argument("--image_size", default=[240, 320])
60
+ parser.add_argument("--disable_vis", action="store_true")
61
+
62
+ parser.add_argument("--beta", type=float, default=0.6)
63
+ parser.add_argument("--filter_thresh", type=float, default=1.75)
64
+ parser.add_argument("--warmup", type=int, default=12)
65
+ parser.add_argument("--keyframe_thresh", type=float, default=2.25)
66
+ parser.add_argument("--frontend_thresh", type=float, default=12.0)
67
+ parser.add_argument("--frontend_window", type=int, default=25)
68
+ parser.add_argument("--frontend_radius", type=int, default=2)
69
+ parser.add_argument("--frontend_nms", type=int, default=1)
70
+
71
+ parser.add_argument("--backend_thresh", type=float, default=15.0)
72
+ parser.add_argument("--backend_radius", type=int, default=2)
73
+ parser.add_argument("--backend_nms", type=int, default=3)
74
+ args = parser.parse_args()
75
+
76
+ args.stereo = False
77
+ torch.multiprocessing.set_start_method('spawn')
78
+
79
+ print("Running evaluation on {}".format(args.datapath))
80
+ print(args)
81
+
82
+ droid = Droid(args)
83
+ time.sleep(5)
84
+
85
+ tstamps = []
86
+ for (t, image, intrinsics) in tqdm(image_stream(args.datapath)):
87
+ if not args.disable_vis:
88
+ show_image(image)
89
+ droid.track(t, image, intrinsics=intrinsics)
90
+
91
+
92
+ traj_est = droid.terminate(image_stream(args.datapath))
93
+
94
+ ### run evaluation ###
95
+
96
+ print("#"*20 + " Results...")
97
+
98
+ import evo
99
+ from evo.core.trajectory import PoseTrajectory3D
100
+ from evo.tools import file_interface
101
+ from evo.core import sync
102
+ import evo.main_ape as main_ape
103
+ from evo.core.metrics import PoseRelation
104
+
105
+ image_path = os.path.join(args.datapath, 'rgb')
106
+ images_list = sorted(glob.glob(os.path.join(image_path, '*.png')))[::2]
107
+ tstamps = [float(x.split('/')[-1][:-4]) for x in images_list]
108
+
109
+ traj_est = PoseTrajectory3D(
110
+ positions_xyz=traj_est[:,:3],
111
+ orientations_quat_wxyz=traj_est[:,3:],
112
+ timestamps=np.array(tstamps))
113
+
114
+ gt_file = os.path.join(args.datapath, 'groundtruth.txt')
115
+ traj_ref = file_interface.read_tum_trajectory_file(gt_file)
116
+
117
+ traj_ref, traj_est = sync.associate_trajectories(traj_ref, traj_est)
118
+ result = main_ape.ape(traj_ref, traj_est, est_name='traj',
119
+ pose_relation=PoseRelation.translation_part, align=True, correct_scale=True)
120
+
121
+
122
+ print(result)
123
+
thirdparty/DROID-SLAM/evaluation_scripts/validate_tartanair.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('droid_slam')
3
+ sys.path.append('thirdparty/tartanair_tools')
4
+
5
+ from tqdm import tqdm
6
+ import numpy as np
7
+ import torch
8
+ import lietorch
9
+ import cv2
10
+ import os
11
+ import glob
12
+ import time
13
+ import yaml
14
+ import argparse
15
+
16
+ from droid import Droid
17
+
18
+ def image_stream(datapath, image_size=[384, 512], intrinsics_vec=[320.0, 320.0, 320.0, 240.0], stereo=False):
19
+ """ image generator """
20
+
21
+ # read all png images in folder
22
+ ht0, wd0 = [480, 640]
23
+ images_left = sorted(glob.glob(os.path.join(datapath, 'image_left/*.png')))
24
+ images_right = sorted(glob.glob(os.path.join(datapath, 'image_right/*.png')))
25
+
26
+ data = []
27
+ for t in range(len(images_left)):
28
+ images = [ cv2.resize(cv2.imread(images_left[t]), (image_size[1], image_size[0])) ]
29
+ if stereo:
30
+ images += [ cv2.resize(cv2.imread(images_right[t]), (image_size[1], image_size[0])) ]
31
+
32
+ images = torch.from_numpy(np.stack(images, 0)).permute(0,3,1,2)
33
+ intrinsics = .8 * torch.as_tensor(intrinsics_vec)
34
+
35
+ data.append((t, images, intrinsics))
36
+
37
+ return data
38
+
39
+
40
+ if __name__ == '__main__':
41
+ parser = argparse.ArgumentParser()
42
+ parser.add_argument("--datapath", default="datasets/TartanAir")
43
+ parser.add_argument("--weights", default="droid.pth")
44
+ parser.add_argument("--buffer", type=int, default=1000)
45
+ parser.add_argument("--image_size", default=[384,512])
46
+ parser.add_argument("--stereo", action="store_true")
47
+ parser.add_argument("--disable_vis", action="store_true")
48
+ parser.add_argument("--plot_curve", action="store_true")
49
+ parser.add_argument("--id", type=int, default=-1)
50
+
51
+ parser.add_argument("--beta", type=float, default=0.3)
52
+ parser.add_argument("--filter_thresh", type=float, default=2.4)
53
+ parser.add_argument("--warmup", type=int, default=12)
54
+ parser.add_argument("--keyframe_thresh", type=float, default=3.5)
55
+ parser.add_argument("--frontend_thresh", type=float, default=15)
56
+ parser.add_argument("--frontend_window", type=int, default=20)
57
+ parser.add_argument("--frontend_radius", type=int, default=1)
58
+ parser.add_argument("--frontend_nms", type=int, default=1)
59
+
60
+ parser.add_argument("--backend_thresh", type=float, default=20.0)
61
+ parser.add_argument("--backend_radius", type=int, default=2)
62
+ parser.add_argument("--backend_nms", type=int, default=3)
63
+
64
+ args = parser.parse_args()
65
+ torch.multiprocessing.set_start_method('spawn')
66
+
67
+ from data_readers.tartan import test_split
68
+ from evaluation.tartanair_evaluator import TartanAirEvaluator
69
+
70
+ if not os.path.isdir("figures"):
71
+ os.mkdir("figures")
72
+
73
+ if args.id >= 0:
74
+ test_split = [ test_split[args.id] ]
75
+
76
+ ate_list = []
77
+ for scene in test_split:
78
+ print("Performing evaluation on {}".format(scene))
79
+ torch.cuda.empty_cache()
80
+ droid = Droid(args)
81
+
82
+ scenedir = os.path.join(args.datapath, scene)
83
+
84
+ for (tstamp, image, intrinsics) in tqdm(image_stream(scenedir, stereo=args.stereo)):
85
+ droid.track(tstamp, image, intrinsics=intrinsics)
86
+
87
+ # fill in non-keyframe poses + global BA
88
+ traj_est = droid.terminate(image_stream(scenedir))
89
+
90
+ ### do evaluation ###
91
+ evaluator = TartanAirEvaluator()
92
+ gt_file = os.path.join(scenedir, "pose_left.txt")
93
+ traj_ref = np.loadtxt(gt_file, delimiter=' ')[:, [1, 2, 0, 4, 5, 3, 6]] # ned -> xyz
94
+
95
+ # usually stereo should not be scale corrected, but we are comparing monocular and stereo here
96
+ results = evaluator.evaluate_one_trajectory(
97
+ traj_ref, traj_est, scale=True, title=scenedir[-20:].replace('/', '_'))
98
+
99
+ print(results)
100
+ ate_list.append(results["ate_score"])
101
+
102
+ print("Results")
103
+ print(ate_list)
104
+
105
+ if args.plot_curve:
106
+ import matplotlib.pyplot as plt
107
+ ate = np.array(ate_list)
108
+ xs = np.linspace(0.0, 1.0, 512)
109
+ ys = [np.count_nonzero(ate < t) / ate.shape[0] for t in xs]
110
+
111
+ plt.plot(xs, ys)
112
+ plt.xlabel("ATE [m]")
113
+ plt.ylabel("% runs")
114
+ plt.show()
115
+
thirdparty/DROID-SLAM/misc/DROID.png ADDED

Git LFS Details

  • SHA256: 99fb33606ad6ea92b4512bd843c65b5db654734d570cc5787df467df4c9d8faf
  • Pointer size: 131 Bytes
  • Size of remote file: 745 kB
thirdparty/DROID-SLAM/misc/renderoption.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "background_color" : [ 1, 1, 1 ],
3
+ "class_name" : "RenderOption",
4
+ "default_mesh_color" : [ 0.69999999999999996, 0.69999999999999996, 0.69999999999999996 ],
5
+ "image_max_depth" : 3000,
6
+ "image_stretch_option" : 0,
7
+ "interpolation_option" : 0,
8
+ "light0_color" : [ 1, 1, 1 ],
9
+ "light0_diffuse_power" : 20,
10
+ "light0_position" : [ 0, 0, 20 ],
11
+ "light0_specular_power" : 2.20000000000000001,
12
+ "light0_specular_shininess" : 100,
13
+ "light1_color" : [ 1, 1, 1 ],
14
+ "light1_diffuse_power" : 0.66000000000000003,
15
+ "light1_position" : [ 0, 0, 2 ],
16
+ "light1_specular_power" : 2.20000000000000001,
17
+ "light1_specular_shininess" : 100,
18
+ "light2_color" : [ 1, 1, 1 ],
19
+ "light2_diffuse_power" : 20,
20
+ "light2_position" : [ 0, 0, -20 ],
21
+ "light2_specular_power" : 2.20000000000000001,
22
+ "light2_specular_shininess" : 100,
23
+ "light3_color" : [ 1, 1, 1 ],
24
+ "light3_diffuse_power" : 20,
25
+ "light3_position" : [ 0, 0, -20 ],
26
+ "light3_specular_power" : 2.20000000000000001,
27
+ "light3_specular_shininess" : 100,
28
+ "light_ambient_color" : [ 0, 0, 0 ],
29
+ "light_on" : true,
30
+ "mesh_color_option" : 1,
31
+ "mesh_shade_option" : 0,
32
+ "mesh_show_back_face" : false,
33
+ "mesh_show_wireframe" : false,
34
+ "point_color_option" : 7,
35
+ "point_show_normal" : false,
36
+ "point_size" : 2,
37
+ "show_coordinate_frame" : false,
38
+ "version_major" : 1,
39
+ "version_minor" : 0
40
+ }
thirdparty/DROID-SLAM/misc/screenshot.png ADDED

Git LFS Details

  • SHA256: b8bb7761f678a743bf6a5b8af137c9d624e44a7d0f1111acf602823278a0529a
  • Pointer size: 131 Bytes
  • Size of remote file: 256 kB
thirdparty/DROID-SLAM/setup.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup
2
+ from torch.utils.cpp_extension import BuildExtension, CUDAExtension
3
+
4
+ import os.path as osp
5
+ ROOT = osp.dirname(osp.abspath(__file__))
6
+
7
+ setup(
8
+ name='droid_backends',
9
+ ext_modules=[
10
+ CUDAExtension('droid_backends',
11
+ include_dirs=[osp.join(ROOT, 'thirdparty/eigen')],
12
+ sources=[
13
+ 'src/droid.cpp',
14
+ 'src/droid_kernels.cu',
15
+ 'src/correlation_kernels.cu',
16
+ 'src/altcorr_kernel.cu',
17
+ ],
18
+ extra_compile_args={
19
+ 'cxx': ['-O3'],
20
+ 'nvcc': ['-O3',
21
+ '-gencode=arch=compute_60,code=sm_60',
22
+ '-gencode=arch=compute_61,code=sm_61',
23
+ '-gencode=arch=compute_70,code=sm_70',
24
+ '-gencode=arch=compute_75,code=sm_75',
25
+ '-gencode=arch=compute_80,code=sm_80',
26
+ '-gencode=arch=compute_86,code=sm_86',
27
+ ]
28
+ }),
29
+ ],
30
+ cmdclass={ 'build_ext' : BuildExtension }
31
+ )
32
+
33
+ setup(
34
+ name='lietorch',
35
+ version='0.2',
36
+ description='Lie Groups for PyTorch',
37
+ packages=['lietorch'],
38
+ package_dir={'': 'thirdparty/lietorch'},
39
+ ext_modules=[
40
+ CUDAExtension('lietorch_backends',
41
+ include_dirs=[
42
+ osp.join(ROOT, 'thirdparty/lietorch/lietorch/include'),
43
+ osp.join(ROOT, 'thirdparty/eigen')],
44
+ sources=[
45
+ 'thirdparty/lietorch/lietorch/src/lietorch.cpp',
46
+ 'thirdparty/lietorch/lietorch/src/lietorch_gpu.cu',
47
+ 'thirdparty/lietorch/lietorch/src/lietorch_cpu.cpp'],
48
+ extra_compile_args={
49
+ 'cxx': ['-O2'],
50
+ 'nvcc': ['-O2',
51
+ '-gencode=arch=compute_60,code=sm_60',
52
+ '-gencode=arch=compute_61,code=sm_61',
53
+ '-gencode=arch=compute_70,code=sm_70',
54
+ '-gencode=arch=compute_75,code=sm_75',
55
+ '-gencode=arch=compute_80,code=sm_80',
56
+ '-gencode=arch=compute_86,code=sm_86',
57
+ ]
58
+ }),
59
+ ],
60
+ cmdclass={ 'build_ext' : BuildExtension }
61
+ )
thirdparty/DROID-SLAM/src/altcorr_kernel.cu ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <torch/extension.h>
2
+ #include <cuda.h>
3
+ #include <cuda_runtime.h>
4
+ #include <vector>
5
+ #include <cuda_fp16.h>
6
+ #include <cuda_runtime.h>
7
+
8
+
9
+ #include <ATen/ATen.h>
10
+ #include <ATen/NativeFunctions.h>
11
+ #include <ATen/cuda/CUDAApplyUtils.cuh>
12
+ #include <ATen/native/cuda/KernelUtils.cuh>
13
+
14
+
15
+
16
+ #define BLOCK_H 4
17
+ #define BLOCK_W 8
18
+ #define BLOCK_HW BLOCK_H * BLOCK_W
19
+ #define CHANNEL_STRIDE 32
20
+
21
+
22
+ __forceinline__ __device__
23
+ bool within_bounds(int h, int w, int H, int W) {
24
+ return h >= 0 && h < H && w >= 0 && w < W;
25
+ }
26
+
27
+ template <typename scalar_t>
28
+ __global__ void altcorr_forward_kernel(
29
+ const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap1,
30
+ const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap2,
31
+ const torch::PackedTensorAccessor32<float,5,torch::RestrictPtrTraits> coords,
32
+ torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr,
33
+ int r)
34
+ {
35
+ const int b = blockIdx.x;
36
+ const int h0 = blockIdx.y * blockDim.x;
37
+ const int w0 = blockIdx.z * blockDim.y;
38
+ const int tid = threadIdx.x * blockDim.y + threadIdx.y;
39
+
40
+ const int H1 = fmap1.size(1);
41
+ const int W1 = fmap1.size(2);
42
+ const int H2 = fmap2.size(1);
43
+ const int W2 = fmap2.size(2);
44
+ const int N = coords.size(1);
45
+ const int C = fmap1.size(3);
46
+
47
+ __shared__ scalar_t f1[CHANNEL_STRIDE][BLOCK_HW];
48
+ __shared__ scalar_t f2[CHANNEL_STRIDE][BLOCK_HW];
49
+
50
+ __shared__ float x2s[BLOCK_HW];
51
+ __shared__ float y2s[BLOCK_HW];
52
+
53
+ for (int c=0; c<C; c+=CHANNEL_STRIDE) {
54
+ for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
55
+ int k1 = k + tid / CHANNEL_STRIDE;
56
+ int h1 = h0 + k1 / BLOCK_W;
57
+ int w1 = w0 + k1 % BLOCK_W;
58
+ int c1 = tid % CHANNEL_STRIDE;
59
+
60
+ if (within_bounds(h1, w1, H1, W1))
61
+ f1[c1][k1] = fmap1[b][h1][w1][c+c1];
62
+
63
+ else
64
+ f1[c1][k1] = 0.0;
65
+ }
66
+
67
+ __syncthreads();
68
+
69
+ for (int n=0; n<N; n++) {
70
+ int h1 = h0 + threadIdx.x;
71
+ int w1 = w0 + threadIdx.y;
72
+ if (within_bounds(h1, w1, H1, W1)) {
73
+ x2s[tid] = coords[b][n][h1][w1][0];
74
+ y2s[tid] = coords[b][n][h1][w1][1];
75
+ }
76
+
77
+ float dx = x2s[tid] - floor(x2s[tid]);
78
+ float dy = y2s[tid] - floor(y2s[tid]);
79
+
80
+ int rd = 2*r + 1;
81
+ for (int iy=0; iy<rd+1; iy++) {
82
+ for (int ix=0; ix<rd+1; ix++) {
83
+ for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
84
+ int k1 = k + tid / CHANNEL_STRIDE;
85
+ int h2 = static_cast<int>(floor(y2s[k1])) - r + iy;
86
+ int w2 = static_cast<int>(floor(x2s[k1])) - r + ix;
87
+ int c2 = tid % CHANNEL_STRIDE;
88
+
89
+ if (within_bounds(h2, w2, H2, W2))
90
+ f2[c2][k1] = fmap2[b][h2][w2][c+c2];
91
+
92
+ else
93
+ f2[c2][k1] = static_cast<scalar_t>(0.0);
94
+ }
95
+
96
+ __syncthreads();
97
+
98
+ scalar_t s = 0.0;
99
+ for (int k=0; k<CHANNEL_STRIDE; k++)
100
+ s += f1[k][tid] * f2[k][tid];
101
+
102
+ int ix_nw = H1*W1*((iy-1) + rd*(ix-1));
103
+ int ix_ne = H1*W1*((iy-1) + rd*ix);
104
+ int ix_sw = H1*W1*(iy + rd*(ix-1));
105
+ int ix_se = H1*W1*(iy + rd*ix);
106
+
107
+ // int ix_nw = ((iy-1) + rd*(ix-1));
108
+ // int ix_ne = ((iy-1) + rd*ix);
109
+ // int ix_sw = (iy + rd*(ix-1));
110
+ // int ix_se = (iy + rd*ix);
111
+
112
+ scalar_t nw = s * static_cast<scalar_t>((dy) * (dx));
113
+ scalar_t ne = s * static_cast<scalar_t>((dy) * (1-dx));
114
+ scalar_t sw = s * static_cast<scalar_t>((1-dy) * (dx));
115
+ scalar_t se = s * static_cast<scalar_t>((1-dy) * (1-dx));
116
+
117
+ // if (iy > 0 && ix > 0 && within_bounds(h1, w1, H1, W1))
118
+ // corr[b][n][ix_nw][h1][w1] += nw;
119
+
120
+ // if (iy > 0 && ix < rd && within_bounds(h1, w1, H1, W1))
121
+ // corr[b][n][ix_ne][h1][w1] += ne;
122
+
123
+ // if (iy < rd && ix > 0 && within_bounds(h1, w1, H1, W1))
124
+ // corr[b][n][ix_sw][h1][w1] += sw;
125
+
126
+ // if (iy < rd && ix < rd && within_bounds(h1, w1, H1, W1))
127
+ // corr[b][n][ix_se][h1][w1] += se;
128
+
129
+
130
+ scalar_t* corr_ptr = &corr[b][n][0][h1][w1];
131
+
132
+ if (iy > 0 && ix > 0 && within_bounds(h1, w1, H1, W1))
133
+ *(corr_ptr + ix_nw) += nw;
134
+
135
+ if (iy > 0 && ix < rd && within_bounds(h1, w1, H1, W1))
136
+ *(corr_ptr + ix_ne) += ne;
137
+
138
+ if (iy < rd && ix > 0 && within_bounds(h1, w1, H1, W1))
139
+ *(corr_ptr + ix_sw) += sw;
140
+
141
+ if (iy < rd && ix < rd && within_bounds(h1, w1, H1, W1))
142
+ *(corr_ptr + ix_se) += se;
143
+
144
+
145
+ }
146
+ }
147
+ }
148
+ }
149
+ }
150
+
151
+
152
+ template <typename scalar_t>
153
+ __global__ void altcorr_backward_kernel(
154
+ const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap1,
155
+ const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap2,
156
+ const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> coords,
157
+ const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr_grad,
158
+ torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap1_grad,
159
+ torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap2_grad,
160
+ torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> coords_grad,
161
+ int r)
162
+ {
163
+
164
+ const int b = blockIdx.x;
165
+ const int h0 = blockIdx.y * blockDim.x;
166
+ const int w0 = blockIdx.z * blockDim.y;
167
+ const int tid = threadIdx.x * blockDim.y + threadIdx.y;
168
+
169
+ const int H1 = fmap1.size(1);
170
+ const int W1 = fmap1.size(2);
171
+ const int H2 = fmap2.size(1);
172
+ const int W2 = fmap2.size(2);
173
+ const int N = coords.size(1);
174
+ const int C = fmap1.size(3);
175
+
176
+ __shared__ scalar_t f1[CHANNEL_STRIDE][BLOCK_HW+1];
177
+ __shared__ scalar_t f2[CHANNEL_STRIDE][BLOCK_HW+1];
178
+
179
+ __shared__ scalar_t f1_grad[CHANNEL_STRIDE][BLOCK_HW+1];
180
+ __shared__ scalar_t f2_grad[CHANNEL_STRIDE][BLOCK_HW+1];
181
+
182
+ __shared__ scalar_t x2s[BLOCK_HW];
183
+ __shared__ scalar_t y2s[BLOCK_HW];
184
+
185
+ for (int c=0; c<C; c+=CHANNEL_STRIDE) {
186
+
187
+ for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
188
+ int k1 = k + tid / CHANNEL_STRIDE;
189
+ int h1 = h0 + k1 / BLOCK_W;
190
+ int w1 = w0 + k1 % BLOCK_W;
191
+ int c1 = tid % CHANNEL_STRIDE;
192
+
193
+ auto fptr = fmap1[b][h1][w1];
194
+ if (within_bounds(h1, w1, H1, W1))
195
+ f1[c1][k1] = fptr[c+c1];
196
+ else
197
+ f1[c1][k1] = 0.0;
198
+
199
+ f1_grad[c1][k1] = 0.0;
200
+ }
201
+
202
+ __syncthreads();
203
+
204
+ int h1 = h0 + threadIdx.x;
205
+ int w1 = w0 + threadIdx.y;
206
+
207
+ for (int n=0; n<N; n++) {
208
+ x2s[tid] = coords[b][n][h1][w1][0];
209
+ y2s[tid] = coords[b][n][h1][w1][1];
210
+
211
+ scalar_t dx = x2s[tid] - floor(x2s[tid]);
212
+ scalar_t dy = y2s[tid] - floor(y2s[tid]);
213
+
214
+ int rd = 2*r + 1;
215
+ for (int iy=0; iy<rd+1; iy++) {
216
+ for (int ix=0; ix<rd+1; ix++) {
217
+ for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
218
+ int k1 = k + tid / CHANNEL_STRIDE;
219
+ int h2 = static_cast<int>(floor(y2s[k1]))-r+iy;
220
+ int w2 = static_cast<int>(floor(x2s[k1]))-r+ix;
221
+ int c2 = tid % CHANNEL_STRIDE;
222
+
223
+ auto fptr = fmap2[b][h2][w2];
224
+ if (within_bounds(h2, w2, H2, W2))
225
+ f2[c2][k1] = fptr[c+c2];
226
+ else
227
+ f2[c2][k1] = 0.0;
228
+
229
+ f2_grad[c2][k1] = 0.0;
230
+ }
231
+
232
+ __syncthreads();
233
+
234
+ const scalar_t* grad_ptr = &corr_grad[b][n][0][h1][w1];
235
+ scalar_t g = 0.0;
236
+
237
+ int ix_nw = H1*W1*((iy-1) + rd*(ix-1));
238
+ int ix_ne = H1*W1*((iy-1) + rd*ix);
239
+ int ix_sw = H1*W1*(iy + rd*(ix-1));
240
+ int ix_se = H1*W1*(iy + rd*ix);
241
+
242
+ if (iy > 0 && ix > 0 && within_bounds(h1, w1, H1, W1))
243
+ g += *(grad_ptr + ix_nw) * dy * dx;
244
+
245
+ if (iy > 0 && ix < rd && within_bounds(h1, w1, H1, W1))
246
+ g += *(grad_ptr + ix_ne) * dy * (1-dx);
247
+
248
+ if (iy < rd && ix > 0 && within_bounds(h1, w1, H1, W1))
249
+ g += *(grad_ptr + ix_sw) * (1-dy) * dx;
250
+
251
+ if (iy < rd && ix < rd && within_bounds(h1, w1, H1, W1))
252
+ g += *(grad_ptr + ix_se) * (1-dy) * (1-dx);
253
+
254
+ for (int k=0; k<CHANNEL_STRIDE; k++) {
255
+ f1_grad[k][tid] += g * f2[k][tid];
256
+ f2_grad[k][tid] += g * f1[k][tid];
257
+ }
258
+
259
+ for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
260
+ int k1 = k + tid / CHANNEL_STRIDE;
261
+ int h2 = static_cast<int>(floor(y2s[k1]))-r+iy;
262
+ int w2 = static_cast<int>(floor(x2s[k1]))-r+ix;
263
+ int c2 = tid % CHANNEL_STRIDE;
264
+
265
+ scalar_t* fptr = &fmap2_grad[b][h2][w2][0];
266
+ if (within_bounds(h2, w2, H2, W2))
267
+ atomicAdd(fptr+c+c2, f2_grad[c2][k1]);
268
+ }
269
+ }
270
+ }
271
+ }
272
+ __syncthreads();
273
+
274
+
275
+ for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
276
+ int k1 = k + tid / CHANNEL_STRIDE;
277
+ int h1 = h0 + k1 / BLOCK_W;
278
+ int w1 = w0 + k1 % BLOCK_W;
279
+ int c1 = tid % CHANNEL_STRIDE;
280
+
281
+ scalar_t* fptr = &fmap1_grad[b][h1][w1][0];
282
+ if (within_bounds(h1, w1, H1, W1))
283
+ fptr[c+c1] += f1_grad[c1][k1];
284
+ }
285
+ }
286
+ }
287
+
288
+
289
+
290
+ std::vector<torch::Tensor> altcorr_cuda_forward(
291
+ torch::Tensor fmap1,
292
+ torch::Tensor fmap2,
293
+ torch::Tensor coords,
294
+ int radius)
295
+ {
296
+ const auto B = coords.size(0);
297
+ const auto N = coords.size(1);
298
+ const auto H = coords.size(2);
299
+ const auto W = coords.size(3);
300
+
301
+ const auto rd = 2 * radius + 1;
302
+ auto opts = fmap1.options();
303
+ auto corr = torch::zeros({B, N, rd*rd, H, W}, opts);
304
+
305
+ const dim3 blocks(B, (H+BLOCK_H-1)/BLOCK_H, (W+BLOCK_W-1)/BLOCK_W);
306
+ const dim3 threads(BLOCK_H, BLOCK_W);
307
+
308
+
309
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(fmap1.type(), "altcorr_forward_kernel", ([&] {
310
+ altcorr_forward_kernel<scalar_t><<<blocks, threads>>>(
311
+ fmap1.packed_accessor32<scalar_t,4,torch::RestrictPtrTraits>(),
312
+ fmap2.packed_accessor32<scalar_t,4,torch::RestrictPtrTraits>(),
313
+ coords.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
314
+ corr.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
315
+ radius);
316
+ }));
317
+
318
+ return {corr};
319
+ }
320
+
321
+ std::vector<torch::Tensor> altcorr_cuda_backward(
322
+ torch::Tensor fmap1,
323
+ torch::Tensor fmap2,
324
+ torch::Tensor coords,
325
+ torch::Tensor corr_grad,
326
+ int radius)
327
+ {
328
+ const auto B = coords.size(0);
329
+ const auto N = coords.size(1);
330
+
331
+ const auto H1 = fmap1.size(1);
332
+ const auto W1 = fmap1.size(2);
333
+ const auto H2 = fmap2.size(1);
334
+ const auto W2 = fmap2.size(2);
335
+ const auto C = fmap1.size(3);
336
+
337
+ auto opts = fmap1.options();
338
+ auto fmap1_grad = torch::zeros({B, H1, W1, C}, opts);
339
+ auto fmap2_grad = torch::zeros({B, H2, W2, C}, opts);
340
+ auto coords_grad = torch::zeros({B, N, H1, W1, 2}, opts);
341
+
342
+ const dim3 blocks(B, (H1+BLOCK_H-1)/BLOCK_H, (W1+BLOCK_W-1)/BLOCK_W);
343
+ const dim3 threads(BLOCK_H, BLOCK_W);
344
+
345
+ altcorr_backward_kernel<float><<<blocks, threads>>>(
346
+ fmap1.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
347
+ fmap2.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
348
+ coords.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
349
+ corr_grad.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
350
+ fmap1_grad.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
351
+ fmap2_grad.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
352
+ coords_grad.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
353
+ radius);
354
+
355
+ return {fmap1_grad, fmap2_grad, coords_grad};
356
+ }
thirdparty/DROID-SLAM/src/correlation_kernels.cu ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <torch/extension.h>
2
+ #include <cuda.h>
3
+ #include <cuda_runtime.h>
4
+ #include <vector>
5
+ #include <cuda_fp16.h>
6
+ #include <cuda_runtime.h>
7
+
8
+
9
+ #include <ATen/ATen.h>
10
+ #include <ATen/NativeFunctions.h>
11
+ #include <ATen/Parallel.h>
12
+
13
+ #define BLOCK 16
14
+
15
+ __forceinline__ __device__ bool within_bounds(int h, int w, int H, int W) {
16
+ return h >= 0 && h < H && w >= 0 && w < W;
17
+ }
18
+
19
+ template <typename scalar_t>
20
+ __global__ void corr_index_forward_kernel(
21
+ const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> volume,
22
+ const torch::PackedTensorAccessor32<float,4,torch::RestrictPtrTraits> coords,
23
+ torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr,
24
+ int r)
25
+ {
26
+ // batch index
27
+ const int x = blockIdx.x * blockDim.x + threadIdx.x;
28
+ const int y = blockIdx.y * blockDim.y + threadIdx.y;
29
+ const int n = blockIdx.z;
30
+
31
+ const int h1 = volume.size(1);
32
+ const int w1 = volume.size(2);
33
+ const int h2 = volume.size(3);
34
+ const int w2 = volume.size(4);
35
+
36
+ if (!within_bounds(y, x, h1, w1)) {
37
+ return;
38
+ }
39
+
40
+ float x0 = coords[n][0][y][x];
41
+ float y0 = coords[n][1][y][x];
42
+
43
+ float dx = x0 - floor(x0);
44
+ float dy = y0 - floor(y0);
45
+
46
+ int rd = 2*r + 1;
47
+ for (int i=0; i<rd+1; i++) {
48
+ for (int j=0; j<rd+1; j++) {
49
+ int x1 = static_cast<int>(floor(x0)) - r + i;
50
+ int y1 = static_cast<int>(floor(y0)) - r + j;
51
+
52
+ if (within_bounds(y1, x1, h2, w2)) {
53
+ scalar_t s = volume[n][y][x][y1][x1];
54
+
55
+ if (i > 0 && j > 0)
56
+ corr[n][i-1][j-1][y][x] += s * scalar_t(dx * dy);
57
+
58
+ if (i > 0 && j < rd)
59
+ corr[n][i-1][j][y][x] += s * scalar_t(dx * (1.0f-dy));
60
+
61
+ if (i < rd && j > 0)
62
+ corr[n][i][j-1][y][x] += s * scalar_t((1.0f-dx) * dy);
63
+
64
+ if (i < rd && j < rd)
65
+ corr[n][i][j][y][x] += s * scalar_t((1.0f-dx) * (1.0f-dy));
66
+
67
+ }
68
+ }
69
+ }
70
+ }
71
+
72
+
73
+ template <typename scalar_t>
74
+ __global__ void corr_index_backward_kernel(
75
+ const torch::PackedTensorAccessor32<float,4,torch::RestrictPtrTraits> coords,
76
+ const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr_grad,
77
+ torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> volume_grad,
78
+ int r)
79
+ {
80
+ // batch index
81
+ const int x = blockIdx.x * blockDim.x + threadIdx.x;
82
+ const int y = blockIdx.y * blockDim.y + threadIdx.y;
83
+ const int n = blockIdx.z;
84
+
85
+ const int h1 = volume_grad.size(1);
86
+ const int w1 = volume_grad.size(2);
87
+ const int h2 = volume_grad.size(3);
88
+ const int w2 = volume_grad.size(4);
89
+
90
+ if (!within_bounds(y, x, h1, w1)) {
91
+ return;
92
+ }
93
+
94
+ float x0 = coords[n][0][y][x];
95
+ float y0 = coords[n][1][y][x];
96
+
97
+ float dx = x0 - floor(x0);
98
+ float dy = y0 - floor(y0);
99
+
100
+ int rd = 2*r + 1;
101
+ for (int i=0; i<rd+1; i++) {
102
+ for (int j=0; j<rd+1; j++) {
103
+ int x1 = static_cast<int>(floor(x0)) - r + i;
104
+ int y1 = static_cast<int>(floor(y0)) - r + j;
105
+
106
+ if (within_bounds(y1, x1, h2, w2)) {
107
+ scalar_t g = 0.0;
108
+ if (i > 0 && j > 0)
109
+ g += corr_grad[n][i-1][j-1][y][x] * scalar_t(dx * dy);
110
+
111
+ if (i > 0 && j < rd)
112
+ g += corr_grad[n][i-1][j][y][x] * scalar_t(dx * (1.0f-dy));
113
+
114
+ if (i < rd && j > 0)
115
+ g += corr_grad[n][i][j-1][y][x] * scalar_t((1.0f-dx) * dy);
116
+
117
+ if (i < rd && j < rd)
118
+ g += corr_grad[n][i][j][y][x] * scalar_t((1.0f-dx) * (1.0f-dy));
119
+
120
+ volume_grad[n][y][x][y1][x1] += g;
121
+ }
122
+ }
123
+ }
124
+ }
125
+
126
+ std::vector<torch::Tensor> corr_index_cuda_forward(
127
+ torch::Tensor volume,
128
+ torch::Tensor coords,
129
+ int radius)
130
+ {
131
+ const auto batch_size = volume.size(0);
132
+ const auto ht = volume.size(1);
133
+ const auto wd = volume.size(2);
134
+
135
+ const dim3 blocks((wd + BLOCK - 1) / BLOCK,
136
+ (ht + BLOCK - 1) / BLOCK,
137
+ batch_size);
138
+
139
+ const dim3 threads(BLOCK, BLOCK);
140
+
141
+ auto opts = volume.options();
142
+ torch::Tensor corr = torch::zeros(
143
+ {batch_size, 2*radius+1, 2*radius+1, ht, wd}, opts);
144
+
145
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(volume.type(), "sampler_forward_kernel", ([&] {
146
+ corr_index_forward_kernel<scalar_t><<<blocks, threads>>>(
147
+ volume.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
148
+ coords.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
149
+ corr.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
150
+ radius);
151
+ }));
152
+
153
+ return {corr};
154
+
155
+ }
156
+
157
+ std::vector<torch::Tensor> corr_index_cuda_backward(
158
+ torch::Tensor volume,
159
+ torch::Tensor coords,
160
+ torch::Tensor corr_grad,
161
+ int radius)
162
+ {
163
+ const auto batch_size = volume.size(0);
164
+ const auto ht = volume.size(1);
165
+ const auto wd = volume.size(2);
166
+
167
+ auto volume_grad = torch::zeros_like(volume);
168
+
169
+ const dim3 blocks((wd + BLOCK - 1) / BLOCK,
170
+ (ht + BLOCK - 1) / BLOCK,
171
+ batch_size);
172
+
173
+ const dim3 threads(BLOCK, BLOCK);
174
+
175
+
176
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(volume.type(), "sampler_backward_kernel", ([&] {
177
+ corr_index_backward_kernel<scalar_t><<<blocks, threads>>>(
178
+ coords.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
179
+ corr_grad.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
180
+ volume_grad.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
181
+ radius);
182
+ }));
183
+
184
+ return {volume_grad};
185
+ }