Spaces:

ThunderVVV
/

HaWoR

Running

App Files Files Community

ThunderVVV commited on Jan 2

Commit

b7eedf7

1 Parent(s): 9480700

add thirdparty

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +4 -0
.gitignore +73 -0
app.py +1 -1
thirdparty/DROID-SLAM/.gitignore +158 -0
thirdparty/DROID-SLAM/.gitmodules +6 -0
thirdparty/DROID-SLAM/LICENSE +29 -0
thirdparty/DROID-SLAM/README.md +139 -0
thirdparty/DROID-SLAM/demo.py +135 -0
thirdparty/DROID-SLAM/droid_slam/data_readers/__init__.py +1 -0
thirdparty/DROID-SLAM/droid_slam/data_readers/augmentation.py +58 -0
thirdparty/DROID-SLAM/droid_slam/data_readers/base.py +157 -0
thirdparty/DROID-SLAM/droid_slam/data_readers/factory.py +82 -0
thirdparty/DROID-SLAM/droid_slam/data_readers/rgbd_utils.py +190 -0
thirdparty/DROID-SLAM/droid_slam/data_readers/stream.py +234 -0
thirdparty/DROID-SLAM/droid_slam/data_readers/tartan.py +138 -0
thirdparty/DROID-SLAM/droid_slam/data_readers/tartan_test.txt +32 -0
thirdparty/DROID-SLAM/droid_slam/depth_video.py +197 -0
thirdparty/DROID-SLAM/droid_slam/droid.py +102 -0
thirdparty/DROID-SLAM/droid_slam/droid_backend.py +52 -0
thirdparty/DROID-SLAM/droid_slam/droid_frontend.py +119 -0
thirdparty/DROID-SLAM/droid_slam/droid_net.py +226 -0
thirdparty/DROID-SLAM/droid_slam/factor_graph.py +397 -0
thirdparty/DROID-SLAM/droid_slam/geom/__init__.py +0 -0
thirdparty/DROID-SLAM/droid_slam/geom/ba.py +158 -0
thirdparty/DROID-SLAM/droid_slam/geom/chol.py +73 -0
thirdparty/DROID-SLAM/droid_slam/geom/graph_utils.py +113 -0
thirdparty/DROID-SLAM/droid_slam/geom/losses.py +118 -0
thirdparty/DROID-SLAM/droid_slam/geom/projective_ops.py +139 -0
thirdparty/DROID-SLAM/droid_slam/logger.py +54 -0
thirdparty/DROID-SLAM/droid_slam/modules/__init__.py +0 -0
thirdparty/DROID-SLAM/droid_slam/modules/clipping.py +24 -0
thirdparty/DROID-SLAM/droid_slam/modules/corr.py +140 -0
thirdparty/DROID-SLAM/droid_slam/modules/extractor.py +198 -0
thirdparty/DROID-SLAM/droid_slam/modules/gru.py +34 -0
thirdparty/DROID-SLAM/droid_slam/motion_filter.py +92 -0
thirdparty/DROID-SLAM/droid_slam/trajectory_filler.py +112 -0
thirdparty/DROID-SLAM/droid_slam/vis_headless.py +185 -0
thirdparty/DROID-SLAM/droid_slam/visualization.py +189 -0
thirdparty/DROID-SLAM/environment.yaml +22 -0
thirdparty/DROID-SLAM/environment_novis.yaml +20 -0
thirdparty/DROID-SLAM/evaluation_scripts/test_eth3d.py +134 -0
thirdparty/DROID-SLAM/evaluation_scripts/test_euroc.py +142 -0
thirdparty/DROID-SLAM/evaluation_scripts/test_tum.py +123 -0
thirdparty/DROID-SLAM/evaluation_scripts/validate_tartanair.py +115 -0
thirdparty/DROID-SLAM/misc/DROID.png +3 -0
thirdparty/DROID-SLAM/misc/renderoption.json +40 -0
thirdparty/DROID-SLAM/misc/screenshot.png +3 -0
thirdparty/DROID-SLAM/setup.py +61 -0
thirdparty/DROID-SLAM/src/altcorr_kernel.cu +356 -0
thirdparty/DROID-SLAM/src/correlation_kernels.cu +185 -0

.gitattributes CHANGED Viewed

@@ -35,3 +35,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.mp4 filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.mp4 filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text
+thirdparty/Metric3D/media/gifs/demo_1.gif filter=lfs diff=lfs merge=lfs -text
+thirdparty/Metric3D/training/kitti_json_files/eigen_train.json filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,73 @@

+# Project specific data and submodule
+/example/video_0
+/weights
+.vscode/
+**/.DS_Store
+data/pretrain/*.pth
+data/pretrain/*.pth.tar
+data/smpl/SMPL_*.pkl
+*.mov
+example_video/
+/thirdparty/detection
+experiments/
+logs/
+hot3d_*/
+File/
+thirdparty/ZoeDepth
+eval_vis_mdslam/
+eval_vis_*/
+pred_vis/
+_DATA.zip
+train_ddp_process*
+logs*
+/*_trainset_export/
+/*.png
+/*.zip
+/dataset_tars/
+/dataset_untars/
+/datasets/
+/eval_log*/
+*.pth
+*.pkl
+/dataset*/
+/eval*/
+/thirdparty/aitviewer
+# Byte-compiled / optimized / DLL files
+__pycache__/
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Jupyter Notebook
+.ipynb_checkpoints
+*.ipynb
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+vis.mp4
+imgui.ini

app.py CHANGED Viewed

@@ -121,7 +121,7 @@ header = ('''
         <a href="" target="_blank" rel="noopener noreferrer">Jinglei Zhang</a><sup>1</sup>,
         <a href="https://jiankangdeng.github.io/" target="_blank" rel="noopener noreferrer">Jiankang Deng</a><sup>2</sup>,
         <br>
-        <a href="https://scholar.google.com/citations?user=syoPhv8AAAAJ&hl=en" target="_blank" rel="noopener noreferrer">Chao Ma</a><sup>1</sup>
         <a href="https://rolpotamias.github.io" target="_blank" rel="noopener noreferrer">Rolandos Alexandros Potamias</a><sup>2</sup>
     </h3>
     <h3>

         <a href="" target="_blank" rel="noopener noreferrer">Jinglei Zhang</a><sup>1</sup>,
         <a href="https://jiankangdeng.github.io/" target="_blank" rel="noopener noreferrer">Jiankang Deng</a><sup>2</sup>,
         <br>
+        <a href="https://scholar.google.com/citations?user=syoPhv8AAAAJ&hl=en" target="_blank" rel="noopener noreferrer">Chao Ma</a><sup>1</sup>,
         <a href="https://rolpotamias.github.io" target="_blank" rel="noopener noreferrer">Rolandos Alexandros Potamias</a><sup>2</sup>
     </h3>
     <h3>

thirdparty/DROID-SLAM/.gitignore ADDED Viewed

	@@ -0,0 +1,158 @@

+a# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+__pycache__
+build
+dist
+*.egg-info
+*.vscode/
+*.pth
+tests
+checkpoints
+datasets
+runs
+cache
+*.out
+*.o
+data
+figures/*.pdf

thirdparty/DROID-SLAM/.gitmodules ADDED Viewed

	@@ -0,0 +1,6 @@

+[submodule "thirdparty/lietorch"]
+	path = thirdparty/lietorch
+	url = https://github.com/princeton-vl/lietorch
+[submodule "thirdparty/eigen"]
+	path = thirdparty/eigen
+	url = https://gitlab.com/libeigen/eigen.git

thirdparty/DROID-SLAM/LICENSE ADDED Viewed

	@@ -0,0 +1,29 @@

+BSD 3-Clause License
+Copyright (c) 2021, Princeton Vision & Learning Lab
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

thirdparty/DROID-SLAM/README.md ADDED Viewed

	@@ -0,0 +1,139 @@

+# DROID-SLAM
+<!-- <center><img src="misc/DROID.png" width="640" style="center"></center> -->
+[![IMAGE ALT TEXT HERE](misc/screenshot.png)](https://www.youtube.com/watch?v=GG78CSlSHSA)
+[DROID-SLAM: Deep Visual SLAM for Monocular, Stereo, and RGB-D Cameras](https://arxiv.org/abs/2108.10869)
+Zachary Teed and Jia Deng
+```
+@article{teed2021droid,
+  title={{DROID-SLAM: Deep Visual SLAM for Monocular, Stereo, and RGB-D Cameras}},
+  author={Teed, Zachary and Deng, Jia},
+  journal={Advances in neural information processing systems},
+  year={2021}
+}
+```
+**Initial Code Release:** This repo currently provides a single GPU implementation of our monocular, stereo, and RGB-D SLAM systems. It currently contains demos, training, and evaluation scripts.
+## Requirements
+To run the code you will need ...
+* **Inference:** Running the demos will require a GPU with at least 11G of memory.
+* **Training:** Training requires a GPU with at least 24G of memory. We train on 4 x RTX-3090 GPUs.
+## Getting Started
+1. Clone the repo using the `--recursive` flag
+```Bash
+git clone --recursive https://github.com/princeton-vl/DROID-SLAM.git
+```
+2. Creating a new anaconda environment using the provided .yaml file. Use `environment_novis.yaml` to if you do not want to use the visualization
+```Bash
+conda env create -f environment.yaml
+pip install evo --upgrade --no-binary evo
+pip install gdown
+```
+3. Compile the extensions (takes about 10 minutes)
+```Bash
+python setup.py install
+```
+## Demos
+1. Download the model from google drive: [droid.pth](https://drive.google.com/file/d/1PpqVt1H4maBa_GbPJp4NwxRsd9jk-elh/view?usp=sharing)
+2. Download some sample videos using the provided script.
+```Bash
+./tools/download_sample_data.sh
+```
+Run the demo on any of the samples (all demos can be run on a GPU with 11G of memory). While running, press the "s" key to increase the filtering threshold (= more points) and "a" to decrease the filtering threshold (= fewer points). To save the reconstruction with full resolution depth maps use the `--reconstruction_path` flag.
+```Python
+python demo.py --imagedir=data/abandonedfactory --calib=calib/tartan.txt --stride=2
+```
+```Python
+python demo.py --imagedir=data/sfm_bench/rgb --calib=calib/eth.txt
+```
+```Python
+python demo.py --imagedir=data/Barn --calib=calib/barn.txt --stride=1 --backend_nms=4
+```
+```Python
+python demo.py --imagedir=data/mav0/cam0/data --calib=calib/euroc.txt --t0=150
+```
+```Python
+python demo.py --imagedir=data/rgbd_dataset_freiburg3_cabinet/rgb --calib=calib/tum3.txt
+```
+**Running on your own data:** All you need is a calibration file. Calibration files are in the form
+```
+fx fy cx cy [k1 k2 p1 p2 [ k3 [ k4 k5 k6 ]]]
+```
+with parameters in brackets optional.
+## Evaluation
+We provide evaluation scripts for TartanAir, EuRoC, and TUM. EuRoC and TUM can be run on a 1080Ti. The TartanAir and ETH will require 24G of memory.
+### TartanAir (Mono + Stereo)
+Download the [TartanAir](https://theairlab.org/tartanair-dataset/) dataset using the script `thirdparty/tartanair_tools/download_training.py` and put them in `datasets/TartanAir`
+```Bash
+./tools/validate_tartanair.sh --plot_curve            # monocular eval
+./tools/validate_tartanair.sh --plot_curve  --stereo  # stereo eval
+```
+### EuRoC (Mono + Stereo)
+Download the [EuRoC](https://projects.asl.ethz.ch/datasets/doku.php?id=kmavvisualinertialdatasets) sequences (ASL format) and put them in `datasets/EuRoC`
+```Bash
+./tools/evaluate_euroc.sh                             # monocular eval
+./tools/evaluate_euroc.sh --stereo                    # stereo eval
+```
+### TUM-RGBD (Mono)
+Download the fr1 sequences from [TUM-RGBD](https://vision.in.tum.de/data/datasets/rgbd-dataset/download) and put them in `datasets/TUM-RGBD`
+```Bash
+./tools/evaluate_tum.sh                               # monocular eval
+```
+### ETH3D (RGB-D)
+Download the [ETH3D](https://www.eth3d.net/slam_datasets) dataset
+```Bash
+./tools/evaluate_eth3d.sh                             # RGB-D eval
+```
+## Training
+First download the TartanAir dataset. The download script can be found in `thirdparty/tartanair_tools/download_training.py`. You will only need the `rgb` and `depth` data.
+```
+python download_training.py --rgb --depth
+```
+You can then run the training script. We use 4x3090 RTX GPUs for training which takes approximatly 1 week. If you use a different number of GPUs, adjust the learning rate accordingly.
+**Note:** On the first training run, covisibility is computed between all pairs of frames. This can take several hours, but the results are cached so that future training runs will start immediately.
+```
+python train.py --datapath=<path to tartanair> --gpus=4 --lr=0.00025
+```
+## Acknowledgements
+Data from [TartanAir](https://theairlab.org/tartanair-dataset/) was used to train our model. We additionally use evaluation tools from [evo](https://github.com/MichaelGrupp/evo) and [tartanair_tools](https://github.com/castacks/tartanair_tools).

thirdparty/DROID-SLAM/demo.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import sys
+sys.path.append('droid_slam')
+from tqdm import tqdm
+import numpy as np
+import torch
+import lietorch
+import cv2
+import os
+import glob
+import time
+import argparse
+from torch.multiprocessing import Process
+from droid import Droid
+from pycocotools import mask as masktool
+import torch.nn.functional as F
+def show_image(image):
+    image = image.permute(1, 2, 0).cpu().numpy()
+    cv2.imshow('image', image / 255.0)
+    cv2.waitKey(1)
+def image_stream(imagedir, calib, stride):
+    """ image generator """
+    # calib = np.loadtxt(calib, delimiter=" ")
+    fx, fy, cx, cy = calib[:4]
+    K = np.eye(3)
+    K[0,0] = fx
+    K[0,2] = cx
+    K[1,1] = fy
+    K[1,2] = cy
+    image_list = sorted(glob.glob(f'{imagedir}/*.jpg'))
+    image_list = image_list[::stride]
+    for t, imfile in enumerate(image_list):
+        image = cv2.imread(imfile)
+        if len(calib) > 4:
+            image = cv2.undistort(image, K, calib[4:])
+        h0, w0, _ = image.shape
+        h1 = int(h0 * np.sqrt((384 * 512) / (h0 * w0)))
+        w1 = int(w0 * np.sqrt((384 * 512) / (h0 * w0)))
+        image = cv2.resize(image, (w1, h1))
+        image = image[:h1-h1%8, :w1-w1%8]
+        image = torch.as_tensor(image).permute(2, 0, 1)
+        intrinsics = torch.as_tensor([fx, fy, cx, cy])
+        intrinsics[0::2] *= (w1 / w0)
+        intrinsics[1::2] *= (h1 / h0)
+        yield t, image[None], intrinsics
+def save_reconstruction(droid, reconstruction_path):
+    from pathlib import Path
+    import random
+    import string
+    t = droid.video.counter.value
+    tstamps = droid.video.tstamp[:t].cpu().numpy()
+    images = droid.video.images[:t].cpu().numpy()
+    disps = droid.video.disps_up[:t].cpu().numpy()
+    poses = droid.video.poses[:t].cpu().numpy()
+    intrinsics = droid.video.intrinsics[:t].cpu().numpy()
+    Path("reconstructions/{}".format(reconstruction_path)).mkdir(parents=True, exist_ok=True)
+    np.save("reconstructions/{}/tstamps.npy".format(reconstruction_path), tstamps)
+    np.save("reconstructions/{}/images.npy".format(reconstruction_path), images)
+    np.save("reconstructions/{}/disps.npy".format(reconstruction_path), disps)
+    np.save("reconstructions/{}/poses.npy".format(reconstruction_path), poses)
+    np.save("reconstructions/{}/intrinsics.npy".format(reconstruction_path), intrinsics)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--imagedir", type=str, help="path to image directory")
+    parser.add_argument("--calib", type=str, help="path to calibration file")
+    parser.add_argument("--t0", default=0, type=int, help="starting frame")
+    parser.add_argument("--stride", default=3, type=int, help="frame stride")
+    parser.add_argument("--weights", default="droid.pth")
+    parser.add_argument("--buffer", type=int, default=512)
+    parser.add_argument("--image_size", default=[240, 320])
+    parser.add_argument("--disable_vis", action="store_true")
+    parser.add_argument("--beta", type=float, default=0.3, help="weight for translation / rotation components of flow")
+    parser.add_argument("--filter_thresh", type=float, default=2.4, help="how much motion before considering new keyframe")
+    parser.add_argument("--warmup", type=int, default=8, help="number of warmup frames")
+    parser.add_argument("--keyframe_thresh", type=float, default=4.0, help="threshold to create a new keyframe")
+    parser.add_argument("--frontend_thresh", type=float, default=16.0, help="add edges between frames whithin this distance")
+    parser.add_argument("--frontend_window", type=int, default=25, help="frontend optimization window")
+    parser.add_argument("--frontend_radius", type=int, default=2, help="force edges between frames within radius")
+    parser.add_argument("--frontend_nms", type=int, default=1, help="non-maximal supression of edges")
+    parser.add_argument("--backend_thresh", type=float, default=22.0)
+    parser.add_argument("--backend_radius", type=int, default=2)
+    parser.add_argument("--backend_nms", type=int, default=3)
+    parser.add_argument("--upsample", action="store_true")
+    parser.add_argument("--reconstruction_path", help="path to saved reconstruction")
+    args = parser.parse_args()
+    args.stereo = False
+    torch.multiprocessing.set_start_method('spawn')
+    droid = None
+    # need high resolution depths
+    if args.reconstruction_path is not None:
+        args.upsample = True
+    tstamps = []
+    for (t, image, intrinsics) in tqdm(image_stream(args.imagedir, args.calib, args.stride)):
+        if t < args.t0:
+            continue
+        if not args.disable_vis:
+            show_image(image[0])
+        if droid is None:
+            args.image_size = [image.shape[2], image.shape[3]]
+            droid = Droid(args)
+        droid.track(t, image, intrinsics=intrinsics)
+    if args.reconstruction_path is not None:
+        save_reconstruction(droid, args.reconstruction_path)
+    traj_est = droid.terminate(image_stream(args.imagedir, args.calib, args.stride))

thirdparty/DROID-SLAM/droid_slam/data_readers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

thirdparty/DROID-SLAM/droid_slam/data_readers/augmentation.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import torch
+import torchvision.transforms as transforms
+import numpy as np
+import torch.nn.functional as F
+class RGBDAugmentor:
+    """ perform augmentation on RGB-D video """
+    def __init__(self, crop_size):
+        self.crop_size = crop_size
+        self.augcolor = transforms.Compose([
+            transforms.ToPILImage(),
+            transforms.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.4/3.14),
+            transforms.RandomGrayscale(p=0.1),
+            transforms.ToTensor()])
+        self.max_scale = 0.25
+    def spatial_transform(self, images, depths, poses, intrinsics):
+        """ cropping and resizing """
+        ht, wd = images.shape[2:]
+        max_scale = self.max_scale
+        min_scale = np.log2(np.maximum(
+            (self.crop_size[0] + 1) / float(ht),
+            (self.crop_size[1] + 1) / float(wd)))
+        scale = 2 ** np.random.uniform(min_scale, max_scale)
+        intrinsics = scale * intrinsics
+        depths = depths.unsqueeze(dim=1)
+        images = F.interpolate(images, scale_factor=scale, mode='bilinear',
+            align_corners=False, recompute_scale_factor=True)
+        depths = F.interpolate(depths, scale_factor=scale, recompute_scale_factor=True)
+        # always perform center crop (TODO: try non-center crops)
+        y0 = (images.shape[2] - self.crop_size[0]) // 2
+        x0 = (images.shape[3] - self.crop_size[1]) // 2
+        intrinsics = intrinsics - torch.tensor([0.0, 0.0, x0, y0])
+        images = images[:, :, y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        depths = depths[:, :, y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
+        depths = depths.squeeze(dim=1)
+        return images, poses, depths, intrinsics
+    def color_transform(self, images):
+        """ color jittering """
+        num, ch, ht, wd = images.shape
+        images = images.permute(1, 2, 3, 0).reshape(ch, ht, wd*num)
+        images = 255 * self.augcolor(images[[2,1,0]] / 255.0)
+        return images[[2,1,0]].reshape(ch, ht, wd, num).permute(3,0,1,2).contiguous()
+    def __call__(self, images, poses, depths, intrinsics):
+        images = self.color_transform(images)
+        return self.spatial_transform(images, depths, poses, intrinsics)

thirdparty/DROID-SLAM/droid_slam/data_readers/base.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import numpy as np
+import torch
+import torch.utils.data as data
+import torch.nn.functional as F
+import csv
+import os
+import cv2
+import math
+import random
+import json
+import pickle
+import os.path as osp
+from .augmentation import RGBDAugmentor
+from .rgbd_utils import *
+class RGBDDataset(data.Dataset):
+    def __init__(self, name, datapath, n_frames=4, crop_size=[384,512], fmin=8.0, fmax=75.0, do_aug=True):
+        """ Base class for RGBD dataset """
+        self.aug = None
+        self.root = datapath
+        self.name = name
+        self.n_frames = n_frames
+        self.fmin = fmin # exclude very easy examples
+        self.fmax = fmax # exclude very hard examples
+        if do_aug:
+            self.aug = RGBDAugmentor(crop_size=crop_size)
+        # building dataset is expensive, cache so only needs to be performed once
+        cur_path = osp.dirname(osp.abspath(__file__))
+        if not os.path.isdir(osp.join(cur_path, 'cache')):
+            os.mkdir(osp.join(cur_path, 'cache'))
+        cache_path = osp.join(cur_path, 'cache', '{}.pickle'.format(self.name))
+        if osp.isfile(cache_path):
+            scene_info = pickle.load(open(cache_path, 'rb'))[0]
+        else:
+            scene_info = self._build_dataset()
+            with open(cache_path, 'wb') as cachefile:
+                pickle.dump((scene_info,), cachefile)
+        self.scene_info = scene_info
+        self._build_dataset_index()
+    def _build_dataset_index(self):
+        self.dataset_index = []
+        for scene in self.scene_info:
+            if not self.__class__.is_test_scene(scene):
+                graph = self.scene_info[scene]['graph']
+                for i in graph:
+                    if len(graph[i][0]) > self.n_frames:
+                        self.dataset_index.append((scene, i))
+            else:
+                print("Reserving {} for validation".format(scene))
+    @staticmethod
+    def image_read(image_file):
+        return cv2.imread(image_file)
+    @staticmethod
+    def depth_read(depth_file):
+        return np.load(depth_file)
+    def build_frame_graph(self, poses, depths, intrinsics, f=16, max_flow=256):
+        """ compute optical flow distance between all pairs of frames """
+        def read_disp(fn):
+            depth = self.__class__.depth_read(fn)[f//2::f, f//2::f]
+            depth[depth < 0.01] = np.mean(depth)
+            return 1.0 / depth
+        poses = np.array(poses)
+        intrinsics = np.array(intrinsics) / f
+        disps = np.stack(list(map(read_disp, depths)), 0)
+        d = f * compute_distance_matrix_flow(poses, disps, intrinsics)
+        # uncomment for nice visualization
+        # import matplotlib.pyplot as plt
+        # plt.imshow(d)
+        # plt.show()
+        graph = {}
+        for i in range(d.shape[0]):
+            j, = np.where(d[i] < max_flow)
+            graph[i] = (j, d[i,j])
+        return graph
+    def __getitem__(self, index):
+        """ return training video """
+        index = index % len(self.dataset_index)
+        scene_id, ix = self.dataset_index[index]
+        frame_graph = self.scene_info[scene_id]['graph']
+        images_list = self.scene_info[scene_id]['images']
+        depths_list = self.scene_info[scene_id]['depths']
+        poses_list = self.scene_info[scene_id]['poses']
+        intrinsics_list = self.scene_info[scene_id]['intrinsics']
+        inds = [ ix ]
+        while len(inds) < self.n_frames:
+            # get other frames within flow threshold
+            k = (frame_graph[ix][1] > self.fmin) & (frame_graph[ix][1] < self.fmax)
+            frames = frame_graph[ix][0][k]
+            # prefer frames forward in time
+            if np.count_nonzero(frames[frames > ix]):
+                ix = np.random.choice(frames[frames > ix])
+            elif np.count_nonzero(frames):
+                ix = np.random.choice(frames)
+            inds += [ ix ]
+        images, depths, poses, intrinsics = [], [], [], []
+        for i in inds:
+            images.append(self.__class__.image_read(images_list[i]))
+            depths.append(self.__class__.depth_read(depths_list[i]))
+            poses.append(poses_list[i])
+            intrinsics.append(intrinsics_list[i])
+        images = np.stack(images).astype(np.float32)
+        depths = np.stack(depths).astype(np.float32)
+        poses = np.stack(poses).astype(np.float32)
+        intrinsics = np.stack(intrinsics).astype(np.float32)
+        images = torch.from_numpy(images).float()
+        images = images.permute(0, 3, 1, 2)
+        disps = torch.from_numpy(1.0 / depths)
+        poses = torch.from_numpy(poses)
+        intrinsics = torch.from_numpy(intrinsics)
+        if self.aug is not None:
+            images, poses, disps, intrinsics = \
+                self.aug(images, poses, disps, intrinsics)
+        # scale scene
+        if len(disps[disps>0.01]) > 0:
+            s = disps[disps>0.01].mean()
+            disps = disps / s
+            poses[...,:3] *= s
+        return images, poses, disps, intrinsics
+    def __len__(self):
+        return len(self.dataset_index)
+    def __imul__(self, x):
+        self.dataset_index *= x
+        return self

thirdparty/DROID-SLAM/droid_slam/data_readers/factory.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import pickle
+import os
+import os.path as osp
+# RGBD-Dataset
+from .tartan import TartanAir
+from .stream import ImageStream
+from .stream import StereoStream
+from .stream import RGBDStream
+# streaming datasets for inference
+from .tartan import TartanAirStream
+from .tartan import TartanAirTestStream
+def dataset_factory(dataset_list, **kwargs):
+    """ create a combined dataset """
+    from torch.utils.data import ConcatDataset
+    dataset_map = { 'tartan': (TartanAir, ) }
+    db_list = []
+    for key in dataset_list:
+        # cache datasets for faster future loading
+        db = dataset_map[key][0](**kwargs)
+        print("Dataset {} has {} images".format(key, len(db)))
+        db_list.append(db)
+    return ConcatDataset(db_list)
+def create_datastream(dataset_path, **kwargs):
+    """ create data_loader to stream images 1 by 1 """
+    from torch.utils.data import DataLoader
+    if osp.isfile(osp.join(dataset_path, 'calibration.txt')):
+        db = ETH3DStream(dataset_path, **kwargs)
+    elif osp.isdir(osp.join(dataset_path, 'image_left')):
+        db = TartanAirStream(dataset_path, **kwargs)
+    elif osp.isfile(osp.join(dataset_path, 'rgb.txt')):
+        db = TUMStream(dataset_path, **kwargs)
+    elif osp.isdir(osp.join(dataset_path, 'mav0')):
+        db = EurocStream(dataset_path, **kwargs)
+    elif osp.isfile(osp.join(dataset_path, 'calib.txt')):
+        db = KITTIStream(dataset_path, **kwargs)
+    else:
+        # db = TartanAirStream(dataset_path, **kwargs)
+        db = TartanAirTestStream(dataset_path, **kwargs)
+    stream = DataLoader(db, shuffle=False, batch_size=1, num_workers=4)
+    return stream
+def create_imagestream(dataset_path, **kwargs):
+    """ create data_loader to stream images 1 by 1 """
+    from torch.utils.data import DataLoader
+    db = ImageStream(dataset_path, **kwargs)
+    return DataLoader(db, shuffle=False, batch_size=1, num_workers=4)
+def create_stereostream(dataset_path, **kwargs):
+    """ create data_loader to stream images 1 by 1 """
+    from torch.utils.data import DataLoader
+    db = StereoStream(dataset_path, **kwargs)
+    return DataLoader(db, shuffle=False, batch_size=1, num_workers=4)
+def create_rgbdstream(dataset_path, **kwargs):
+    """ create data_loader to stream images 1 by 1 """
+    from torch.utils.data import DataLoader
+    db = RGBDStream(dataset_path, **kwargs)
+    return DataLoader(db, shuffle=False, batch_size=1, num_workers=4)

thirdparty/DROID-SLAM/droid_slam/data_readers/rgbd_utils.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import numpy as np
+import os.path as osp
+import torch
+from lietorch import SE3
+import geom.projective_ops as pops
+from scipy.spatial.transform import Rotation
+def parse_list(filepath, skiprows=0):
+    """ read list data """
+    data = np.loadtxt(filepath, delimiter=' ', dtype=np.unicode_, skiprows=skiprows)
+    return data
+def associate_frames(tstamp_image, tstamp_depth, tstamp_pose, max_dt=1.0):
+    """ pair images, depths, and poses """
+    associations = []
+    for i, t in enumerate(tstamp_image):
+        if tstamp_pose is None:
+            j = np.argmin(np.abs(tstamp_depth - t))
+            if (np.abs(tstamp_depth[j] - t) < max_dt):
+                associations.append((i, j))
+        else:
+            j = np.argmin(np.abs(tstamp_depth - t))
+            k = np.argmin(np.abs(tstamp_pose - t))
+            if (np.abs(tstamp_depth[j] - t) < max_dt) and \
+                    (np.abs(tstamp_pose[k] - t) < max_dt):
+                associations.append((i, j, k))
+    return associations
+def loadtum(datapath, frame_rate=-1):
+    """ read video data in tum-rgbd format """
+    if osp.isfile(osp.join(datapath, 'groundtruth.txt')):
+        pose_list = osp.join(datapath, 'groundtruth.txt')
+    elif osp.isfile(osp.join(datapath, 'pose.txt')):
+        pose_list = osp.join(datapath, 'pose.txt')
+    else:
+        return None, None, None, None
+    image_list = osp.join(datapath, 'rgb.txt')
+    depth_list = osp.join(datapath, 'depth.txt')
+    calib_path = osp.join(datapath, 'calibration.txt')
+    intrinsic = None
+    if osp.isfile(calib_path):
+        intrinsic = np.loadtxt(calib_path, delimiter=' ')
+        intrinsic = intrinsic.astype(np.float64)
+    image_data = parse_list(image_list)
+    depth_data = parse_list(depth_list)
+    pose_data = parse_list(pose_list, skiprows=1)
+    pose_vecs = pose_data[:,1:].astype(np.float64)
+    tstamp_image = image_data[:,0].astype(np.float64)
+    tstamp_depth = depth_data[:,0].astype(np.float64)
+    tstamp_pose = pose_data[:,0].astype(np.float64)
+    associations = associate_frames(tstamp_image, tstamp_depth, tstamp_pose)
+    # print(len(tstamp_image))
+    # print(len(associations))
+    indicies = range(len(associations))[::5]
+    # indicies = [ 0 ]
+    # for i in range(1, len(associations)):
+    #     t0 = tstamp_image[associations[indicies[-1]][0]]
+    #     t1 = tstamp_image[associations[i][0]]
+    #     if t1 - t0 > 1.0 / frame_rate:
+    #         indicies += [ i ]
+    images, poses, depths, intrinsics, tstamps = [], [], [], [], []
+    for ix in indicies:
+        (i, j, k) = associations[ix]
+        images += [ osp.join(datapath, image_data[i,1]) ]
+        depths += [ osp.join(datapath, depth_data[j,1]) ]
+        poses += [ pose_vecs[k] ]
+        tstamps += [ tstamp_image[i] ]
+        if intrinsic is not None:
+            intrinsics += [ intrinsic ]
+    return images, depths, poses, intrinsics, tstamps
+def all_pairs_distance_matrix(poses, beta=2.5):
+    """ compute distance matrix between all pairs of poses """
+    poses = np.array(poses, dtype=np.float32)
+    poses[:,:3] *= beta # scale to balence rot + trans
+    poses = SE3(torch.from_numpy(poses))
+    r = (poses[:,None].inv() * poses[None,:]).log()
+    return r.norm(dim=-1).cpu().numpy()
+def pose_matrix_to_quaternion(pose):
+    """ convert 4x4 pose matrix to (t, q) """
+    q = Rotation.from_matrix(pose[:3, :3]).as_quat()
+    return np.concatenate([pose[:3, 3], q], axis=0)
+def compute_distance_matrix_flow(poses, disps, intrinsics):
+    """ compute flow magnitude between all pairs of frames """
+    if not isinstance(poses, SE3):
+        poses = torch.from_numpy(poses).float().cuda()[None]
+        poses = SE3(poses).inv()
+        disps = torch.from_numpy(disps).float().cuda()[None]
+        intrinsics = torch.from_numpy(intrinsics).float().cuda()[None]
+    N = poses.shape[1]
+    ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N), indexing='ij')
+    ii = ii.reshape(-1).cuda()
+    jj = jj.reshape(-1).cuda()
+    MAX_FLOW = 100.0
+    matrix = np.zeros((N, N), dtype=np.float32)
+    s = 2048
+    for i in range(0, ii.shape[0], s):
+        flow1, val1 = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
+        flow2, val2 = pops.induced_flow(poses, disps, intrinsics, jj[i:i+s], ii[i:i+s])
+        flow = torch.stack([flow1, flow2], dim=2)
+        val = torch.stack([val1, val2], dim=2)
+        mag = flow.norm(dim=-1).clamp(max=MAX_FLOW)
+        mag = mag.view(mag.shape[1], -1)
+        val = val.view(val.shape[1], -1)
+        mag = (mag * val).mean(-1) / val.mean(-1)
+        mag[val.mean(-1) < 0.7] = np.inf
+        i1 = ii[i:i+s].cpu().numpy()
+        j1 = jj[i:i+s].cpu().numpy()
+        matrix[i1, j1] = mag.cpu().numpy()
+    return matrix
+def compute_distance_matrix_flow2(poses, disps, intrinsics, beta=0.4):
+    """ compute flow magnitude between all pairs of frames """
+    # if not isinstance(poses, SE3):
+    #     poses = torch.from_numpy(poses).float().cuda()[None]
+    #     poses = SE3(poses).inv()
+    #     disps = torch.from_numpy(disps).float().cuda()[None]
+    #     intrinsics = torch.from_numpy(intrinsics).float().cuda()[None]
+    N = poses.shape[1]
+    ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N), indexing='ij')
+    ii = ii.reshape(-1)
+    jj = jj.reshape(-1)
+    MAX_FLOW = 128.0
+    matrix = np.zeros((N, N), dtype=np.float32)
+    s = 2048
+    for i in range(0, ii.shape[0], s):
+        flow1a, val1a = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s], tonly=True)
+        flow1b, val1b = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
+        flow2a, val2a = pops.induced_flow(poses, disps, intrinsics, jj[i:i+s], ii[i:i+s], tonly=True)
+        flow2b, val2b = pops.induced_flow(poses, disps, intrinsics, ii[i:i+s], jj[i:i+s])
+        flow1 = flow1a + beta * flow1b
+        val1 = val1a * val2b
+        flow2 = flow2a + beta * flow2b
+        val2 = val2a * val2b
+        flow = torch.stack([flow1, flow2], dim=2)
+        val = torch.stack([val1, val2], dim=2)
+        mag = flow.norm(dim=-1).clamp(max=MAX_FLOW)
+        mag = mag.view(mag.shape[1], -1)
+        val = val.view(val.shape[1], -1)
+        mag = (mag * val).mean(-1) / val.mean(-1)
+        mag[val.mean(-1) < 0.8] = np.inf
+        i1 = ii[i:i+s].cpu().numpy()
+        j1 = jj[i:i+s].cpu().numpy()
+        matrix[i1, j1] = mag.cpu().numpy()
+    return matrix

thirdparty/DROID-SLAM/droid_slam/data_readers/stream.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import numpy as np
+import torch
+import torch.utils.data as data
+import torch.nn.functional as F
+import csv
+import os
+import cv2
+import math
+import random
+import json
+import pickle
+import os.path as osp
+from .rgbd_utils import *
+class RGBDStream(data.Dataset):
+    def __init__(self, datapath, frame_rate=-1, image_size=[384,512], crop_size=[0,0]):
+        self.datapath = datapath
+        self.frame_rate = frame_rate
+        self.image_size = image_size
+        self.crop_size = crop_size
+        self._build_dataset_index()
+    @staticmethod
+    def image_read(image_file):
+        return cv2.imread(image_file)
+    @staticmethod
+    def depth_read(depth_file):
+        return np.load(depth_file)
+    def __len__(self):
+        return len(self.images)
+    def __getitem__(self, index):
+        """ return training video """
+        image = self.__class__.image_read(self.images[index])
+        image = torch.from_numpy(image).float()
+        image = image.permute(2, 0, 1)
+        try:
+            tstamp = self.tstamps[index]
+        except:
+            tstamp = index
+        pose = torch.from_numpy(self.poses[index]).float()
+        intrinsic = torch.from_numpy(self.intrinsics[index]).float()
+        # resize image
+        sx = self.image_size[1] / image.shape[2]
+        sy = self.image_size[0] / image.shape[1]
+        image = F.interpolate(image[None], self.image_size, mode='bilinear', align_corners=False)[0]
+        fx, fy, cx, cy = intrinsic.unbind(dim=0)
+        fx, cx = sx * fx, sx * cx
+        fy, cy = sy * fy, sy * cy
+        # crop image
+        if self.crop_size[0] > 0:
+            cy = cy - self.crop_size[0]
+            image = image[:,self.crop_size[0]:-self.crop_size[0],:]
+        if self.crop_size[1] > 0:
+            cx = cx - self.crop_size[1]
+            image = image[:,:,self.crop_size[1]:-self.crop_size[1]]
+        intrinsic = torch.stack([fx, fy, cx, cy])
+        return tstamp, image, pose, intrinsic
+class ImageStream(data.Dataset):
+    def __init__(self, datapath, intrinsics, rate=1, image_size=[384,512]):
+        rgb_list = osp.join(datapath, 'rgb.txt')
+        if os.path.isfile(rgb_list):
+            rgb_list = np.loadtxt(rgb_list, delimiter=' ', dtype=np.unicode_)
+            self.timestamps = rgb_list[:,0].astype(np.float)
+            self.images = [os.path.join(datapath, x) for x in rgb_list[:,1]]
+            self.images = self.images[::rate]
+            self.timestamps = self.timestamps[::rate]
+        else:
+            import glob
+            self.images = sorted(glob.glob(osp.join(datapath, '*.jpg'))) +  sorted(glob.glob(osp.join(datapath, '*.png')))
+            self.images = self.images[::rate]
+        self.intrinsics = intrinsics
+        self.image_size = image_size
+    def __len__(self):
+        return len(self.images)
+    @staticmethod
+    def image_read(imfile):
+        return cv2.imread(imfile)
+    def __getitem__(self, index):
+        """ return training video """
+        image = self.__class__.image_read(self.images[index])
+        try:
+            tstamp = self.timestamps[index]
+        except:
+            tstamp = index
+        ht0, wd0 = image.shape[:2]
+        ht1, wd1 = self.image_size
+        intrinsics = torch.as_tensor(self.intrinsics)
+        intrinsics[0] *= wd1 / wd0
+        intrinsics[1] *= ht1 / ht0
+        intrinsics[2] *= wd1 / wd0
+        intrinsics[3] *= ht1 / ht0
+        # resize image
+        ikwargs = {'mode': 'bilinear', 'align_corners': True}
+        image = torch.from_numpy(image).float().permute(2, 0, 1)
+        image = F.interpolate(image[None], self.image_size, **ikwargs)[0]
+        return tstamp, image, intrinsics
+class StereoStream(data.Dataset):
+    def __init__(self, datapath, intrinsics, rate=1, image_size=[384,512],
+            map_left=None, map_right=None, left_root='image_left', right_root='image_right'):
+        import glob
+        self.intrinsics = intrinsics
+        self.image_size = image_size
+        imgs = sorted(glob.glob(osp.join(datapath, left_root, '*.png')))[::rate]
+        self.images_l = []
+        self.images_r = []
+        self.tstamps = []
+        for img_l in imgs:
+            img_r = img_l.replace(left_root, right_root)
+            if os.path.isfile(img_r):
+                t = np.float(img_l.split('/')[-1].replace('.png', ''))
+                self.tstamps.append(t)
+                self.images_l += [ img_l ]
+                self.images_r += [ img_r ]
+        self.map_left = map_left
+        self.map_right = map_right
+    def __len__(self):
+        return len(self.images_l)
+    @staticmethod
+    def image_read(imfile, imap=None):
+        image = cv2.imread(imfile)
+        if imap is not None:
+            image = cv2.remap(image, imap[0], imap[1], interpolation=cv2.INTER_LINEAR)
+        return image
+    def __getitem__(self, index):
+        """ return training video """
+        tstamp = self.tstamps[index]
+        image_l = self.__class__.image_read(self.images_l[index], self.map_left)
+        image_r = self.__class__.image_read(self.images_r[index], self.map_right)
+        ht0, wd0 = image_l.shape[:2]
+        ht1, wd1 = self.image_size
+        intrinsics = torch.as_tensor(self.intrinsics)
+        intrinsics[0] *= wd1 / wd0
+        intrinsics[1] *= ht1 / ht0
+        intrinsics[2] *= wd1 / wd0
+        intrinsics[3] *= ht1 / ht0
+        image_l = torch.from_numpy(image_l).float().permute(2, 0, 1)
+        image_r = torch.from_numpy(image_r).float().permute(2, 0, 1)
+        # resize image
+        ikwargs = {'mode': 'bilinear', 'align_corners': True}
+        image_l = F.interpolate(image_l[None], self.image_size, **ikwargs)[0]
+        image_r = F.interpolate(image_r[None], self.image_size, **ikwargs)[0]
+        return tstamp, image_l, image_r, intrinsics
+# class RGBDStream(data.Dataset):
+#     def __init__(self, datapath, intrinsics=None, rate=1, image_size=[384,512]):
+#         assoc_file = osp.join(datapath, 'associated.txt')
+#         assoc_list = np.loadtxt(assoc_file, delimiter=' ', dtype=np.unicode_)
+#         self.intrinsics = intrinsics
+#         self.image_size = image_size
+#         self.timestamps = assoc_list[:,0].astype(np.float)[::rate]
+#         self.images = [os.path.join(datapath, x) for x in assoc_list[:,1]][::rate]
+#         self.depths = [os.path.join(datapath, x) for x in assoc_list[:,3]][::rate]
+#     def __len__(self):
+#         return len(self.images)
+#     @staticmethod
+#     def image_read(imfile):
+#         return cv2.imread(imfile)
+#     @staticmethod
+#     def depth_read(depth_file):
+#         depth = cv2.imread(depth_file, cv2.IMREAD_ANYDEPTH)
+#         return depth.astype(np.float32) / 5000.0
+#     def __getitem__(self, index):
+#         """ return training video """
+#         tstamp = self.timestamps[index]
+#         image = self.__class__.image_read(self.images[index])
+#         depth = self.__class__.depth_read(self.depths[index])
+#         ht0, wd0 = image.shape[:2]
+#         ht1, wd1 = self.image_size
+#         intrinsics = torch.as_tensor(self.intrinsics)
+#         intrinsics[0] *= wd1 / wd0
+#         intrinsics[1] *= ht1 / ht0
+#         intrinsics[2] *= wd1 / wd0
+#         intrinsics[3] *= ht1 / ht0
+#         # resize image
+#         ikwargs = {'mode': 'bilinear', 'align_corners': True}
+#         image = torch.from_numpy(image).float().permute(2, 0, 1)
+#         image = F.interpolate(image[None], self.image_size, **ikwargs)[0]
+#         depth = torch.from_numpy(depth).float()[None,None]
+#         depth = F.interpolate(depth, self.image_size, mode='nearest').squeeze()
+#         return tstamp, image, depth, intrinsics

thirdparty/DROID-SLAM/droid_slam/data_readers/tartan.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import numpy as np
+import torch
+import glob
+import cv2
+import os
+import os.path as osp
+from lietorch import SE3
+from .base import RGBDDataset
+from .stream import RGBDStream
+cur_path = osp.dirname(osp.abspath(__file__))
+test_split = osp.join(cur_path, 'tartan_test.txt')
+test_split = open(test_split).read().split()
+class TartanAir(RGBDDataset):
+    # scale depths to balance rot & trans
+    DEPTH_SCALE = 5.0
+    def __init__(self, mode='training', **kwargs):
+        self.mode = mode
+        self.n_frames = 2
+        super(TartanAir, self).__init__(name='TartanAir', **kwargs)
+    @staticmethod
+    def is_test_scene(scene):
+        # print(scene, any(x in scene for x in test_split))
+        return any(x in scene for x in test_split)
+    def _build_dataset(self):
+        from tqdm import tqdm
+        print("Building TartanAir dataset")
+        scene_info = {}
+        scenes = glob.glob(osp.join(self.root, '*/*/*/*'))
+        for scene in tqdm(sorted(scenes)):
+            images = sorted(glob.glob(osp.join(scene, 'image_left/*.png')))
+            depths = sorted(glob.glob(osp.join(scene, 'depth_left/*.npy')))
+            poses = np.loadtxt(osp.join(scene, 'pose_left.txt'), delimiter=' ')
+            poses = poses[:, [1, 2, 0, 4, 5, 3, 6]]
+            poses[:,:3] /= TartanAir.DEPTH_SCALE
+            intrinsics = [TartanAir.calib_read()] * len(images)
+            # graph of co-visible frames based on flow
+            graph = self.build_frame_graph(poses, depths, intrinsics)
+            scene = '/'.join(scene.split('/'))
+            scene_info[scene] = {'images': images, 'depths': depths,
+                'poses': poses, 'intrinsics': intrinsics, 'graph': graph}
+        return scene_info
+    @staticmethod
+    def calib_read():
+        return np.array([320.0, 320.0, 320.0, 240.0])
+    @staticmethod
+    def image_read(image_file):
+        return cv2.imread(image_file)
+    @staticmethod
+    def depth_read(depth_file):
+        depth = np.load(depth_file) / TartanAir.DEPTH_SCALE
+        depth[depth==np.nan] = 1.0
+        depth[depth==np.inf] = 1.0
+        return depth
+class TartanAirStream(RGBDStream):
+    def __init__(self, datapath, **kwargs):
+        super(TartanAirStream, self).__init__(datapath=datapath, **kwargs)
+    def _build_dataset_index(self):
+        """ build list of images, poses, depths, and intrinsics """
+        self.root = 'datasets/TartanAir'
+        scene = osp.join(self.root, self.datapath)
+        image_glob = osp.join(scene, 'image_left/*.png')
+        images = sorted(glob.glob(image_glob))
+        poses = np.loadtxt(osp.join(scene, 'pose_left.txt'), delimiter=' ')
+        poses = poses[:, [1, 2, 0, 4, 5, 3, 6]]
+        poses = SE3(torch.as_tensor(poses))
+        poses = poses[[0]].inv() * poses
+        poses = poses.data.cpu().numpy()
+        intrinsic = self.calib_read(self.datapath)
+        intrinsics = np.tile(intrinsic[None], (len(images), 1))
+        self.images = images[::int(self.frame_rate)]
+        self.poses = poses[::int(self.frame_rate)]
+        self.intrinsics = intrinsics[::int(self.frame_rate)]
+    @staticmethod
+    def calib_read(datapath):
+        return np.array([320.0, 320.0, 320.0, 240.0])
+    @staticmethod
+    def image_read(image_file):
+        return cv2.imread(image_file)
+class TartanAirTestStream(RGBDStream):
+    def __init__(self, datapath, **kwargs):
+        super(TartanAirTestStream, self).__init__(datapath=datapath, **kwargs)
+    def _build_dataset_index(self):
+        """ build list of images, poses, depths, and intrinsics """
+        self.root = 'datasets/mono'
+        image_glob = osp.join(self.root, self.datapath, '*.png')
+        images = sorted(glob.glob(image_glob))
+        poses = np.loadtxt(osp.join(self.root, 'mono_gt', self.datapath + '.txt'), delimiter=' ')
+        poses = poses[:, [1, 2, 0, 4, 5, 3, 6]]
+        poses = SE3(torch.as_tensor(poses))
+        poses = poses[[0]].inv() * poses
+        poses = poses.data.cpu().numpy()
+        intrinsic = self.calib_read(self.datapath)
+        intrinsics = np.tile(intrinsic[None], (len(images), 1))
+        self.images = images[::int(self.frame_rate)]
+        self.poses = poses[::int(self.frame_rate)]
+        self.intrinsics = intrinsics[::int(self.frame_rate)]
+    @staticmethod
+    def calib_read(datapath):
+        return np.array([320.0, 320.0, 320.0, 240.0])
+    @staticmethod
+    def image_read(image_file):
+        return cv2.imread(image_file)

thirdparty/DROID-SLAM/droid_slam/data_readers/tartan_test.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+abandonedfactory/abandonedfactory/Easy/P011
+abandonedfactory/abandonedfactory/Hard/P011
+abandonedfactory_night/abandonedfactory_night/Easy/P013
+abandonedfactory_night/abandonedfactory_night/Hard/P014
+amusement/amusement/Easy/P008
+amusement/amusement/Hard/P007
+carwelding/carwelding/Easy/P007
+endofworld/endofworld/Easy/P009
+gascola/gascola/Easy/P008
+gascola/gascola/Hard/P009
+hospital/hospital/Easy/P036
+hospital/hospital/Hard/P049
+japanesealley/japanesealley/Easy/P007
+japanesealley/japanesealley/Hard/P005
+neighborhood/neighborhood/Easy/P021
+neighborhood/neighborhood/Hard/P017
+ocean/ocean/Easy/P013
+ocean/ocean/Hard/P009
+office2/office2/Easy/P011
+office2/office2/Hard/P010
+office/office/Hard/P007
+oldtown/oldtown/Easy/P007
+oldtown/oldtown/Hard/P008
+seasidetown/seasidetown/Easy/P009
+seasonsforest/seasonsforest/Easy/P011
+seasonsforest/seasonsforest/Hard/P006
+seasonsforest_winter/seasonsforest_winter/Easy/P009
+seasonsforest_winter/seasonsforest_winter/Hard/P018
+soulcity/soulcity/Easy/P012
+soulcity/soulcity/Hard/P009
+westerndesert/westerndesert/Easy/P013
+westerndesert/westerndesert/Hard/P007

thirdparty/DROID-SLAM/droid_slam/depth_video.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import numpy as np
+import torch
+import lietorch
+import droid_backends
+from torch.multiprocessing import Process, Queue, Lock, Value
+from collections import OrderedDict
+from droid_net import cvx_upsample
+import geom.projective_ops as pops
+class DepthVideo:
+    def __init__(self, image_size=[480, 640], buffer=1024, stereo=False, device="cuda:0"):
+        # current keyframe count
+        self.counter = Value('i', 0)
+        self.ready = Value('i', 0)
+        self.ht = ht = image_size[0]
+        self.wd = wd = image_size[1]
+        ### state attributes ###
+        self.tstamp = torch.zeros(buffer, device="cuda", dtype=torch.float).share_memory_()
+        self.images = torch.zeros(buffer, 3, ht, wd, device="cuda", dtype=torch.uint8)
+        self.dirty = torch.zeros(buffer, device="cuda", dtype=torch.bool).share_memory_()
+        self.red = torch.zeros(buffer, device="cuda", dtype=torch.bool).share_memory_()
+        self.poses = torch.zeros(buffer, 7, device="cuda", dtype=torch.float).share_memory_()
+        self.disps = torch.ones(buffer, ht//8, wd//8, device="cuda", dtype=torch.float).share_memory_()
+        self.disps_sens = torch.zeros(buffer, ht//8, wd//8, device="cuda", dtype=torch.float).share_memory_()
+        self.disps_up = torch.zeros(buffer, ht, wd, device="cuda", dtype=torch.float).share_memory_()
+        self.intrinsics = torch.zeros(buffer, 4, device="cuda", dtype=torch.float).share_memory_()
+        self.masks = torch.zeros(buffer, ht//8, wd//8, device="cuda", dtype=torch.float).share_memory_()
+        self.stereo = stereo
+        c = 1 if not self.stereo else 2
+        ### feature attributes ###
+        self.fmaps = torch.zeros(buffer, c, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
+        self.nets = torch.zeros(buffer, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
+        self.inps = torch.zeros(buffer, 128, ht//8, wd//8, dtype=torch.half, device="cuda").share_memory_()
+        # initialize poses to identity transformation
+        self.poses[:] = torch.as_tensor([0, 0, 0, 0, 0, 0, 1], dtype=torch.float, device="cuda")
+    def get_lock(self):
+        return self.counter.get_lock()
+    def __item_setter(self, index, item):
+        if isinstance(index, int) and index >= self.counter.value:
+            self.counter.value = index + 1
+        elif isinstance(index, torch.Tensor) and index.max().item() > self.counter.value:
+            self.counter.value = index.max().item() + 1
+        # self.dirty[index] = True
+        self.tstamp[index] = item[0]
+        self.images[index] = item[1]
+        if item[2] is not None:
+            self.poses[index] = item[2]
+        if item[3] is not None:
+            self.disps[index] = item[3]
+        if item[4] is not None:
+            depth = item[4][3::8,3::8]
+            self.disps_sens[index] = torch.where(depth>0, 1.0/depth, depth)
+        if item[5] is not None:
+            self.intrinsics[index] = item[5]
+        if len(item) > 6:
+            self.fmaps[index] = item[6]
+        if len(item) > 7:
+            self.nets[index] = item[7]
+        if len(item) > 8:
+            self.inps[index] = item[8]
+        if len(item) > 9:
+            self.masks[index] = item[9]
+    def __setitem__(self, index, item):
+        with self.get_lock():
+            self.__item_setter(index, item)
+    def __getitem__(self, index):
+        """ index the depth video """
+        with self.get_lock():
+            # support negative indexing
+            if isinstance(index, int) and index < 0:
+                index = self.counter.value + index
+            item = (
+                self.poses[index],
+                self.disps[index],
+                self.intrinsics[index],
+                self.fmaps[index],
+                self.nets[index],
+                self.inps[index])
+        return item
+    def append(self, *item):
+        with self.get_lock():
+            self.__item_setter(self.counter.value, item)
+    ### geometric operations ###
+    @staticmethod
+    def format_indicies(ii, jj):
+        """ to device, long, {-1} """
+        if not isinstance(ii, torch.Tensor):
+            ii = torch.as_tensor(ii)
+        if not isinstance(jj, torch.Tensor):
+            jj = torch.as_tensor(jj)
+        ii = ii.to(device="cuda", dtype=torch.long).reshape(-1)
+        jj = jj.to(device="cuda", dtype=torch.long).reshape(-1)
+        return ii, jj
+    def upsample(self, ix, mask):
+        """ upsample disparity """
+        disps_up = cvx_upsample(self.disps[ix].unsqueeze(-1), mask)
+        self.disps_up[ix] = disps_up.squeeze()
+    def normalize(self):
+        """ normalize depth and poses """
+        with self.get_lock():
+            s = self.disps[:self.counter.value].mean()
+            self.disps[:self.counter.value] /= s
+            self.poses[:self.counter.value,:3] *= s
+            self.dirty[:self.counter.value] = True
+    def reproject(self, ii, jj):
+        """ project points from ii -> jj """
+        ii, jj = DepthVideo.format_indicies(ii, jj)
+        Gs = lietorch.SE3(self.poses[None])
+        coords, valid_mask = \
+            pops.projective_transform(Gs, self.disps[None], self.intrinsics[None], ii, jj)
+        return coords, valid_mask
+    def distance(self, ii=None, jj=None, beta=0.3, bidirectional=True):
+        """ frame distance metric """
+        return_matrix = False
+        if ii is None:
+            return_matrix = True
+            N = self.counter.value
+            ii, jj = torch.meshgrid(torch.arange(N), torch.arange(N), indexing='ij')
+        ii, jj = DepthVideo.format_indicies(ii, jj)
+        if bidirectional:
+            poses = self.poses[:self.counter.value].clone()
+            d1 = droid_backends.frame_distance(
+                poses, self.disps, self.intrinsics[0], ii, jj, beta)
+            d2 = droid_backends.frame_distance(
+                poses, self.disps, self.intrinsics[0], jj, ii, beta)
+            d = .5 * (d1 + d2)
+        else:
+            d = droid_backends.frame_distance(
+                self.poses, self.disps, self.intrinsics[0], ii, jj, beta)
+        if return_matrix:
+            return d.reshape(N, N)
+        return d
+    def ba(self, target, weight, eta, ii, jj, t0=1, t1=None, itrs=2, lm=1e-4, ep=0.1, motion_only=False):
+        """ dense bundle adjustment (DBA) """
+        with self.get_lock():
+            # [t0, t1] window of bundle adjustment optimization
+            if t1 is None:
+                t1 = max(ii.max().item(), jj.max().item()) + 1
+            droid_backends.ba(self.poses, self.disps, self.intrinsics[0], self.disps_sens,
+                target, weight, eta, ii, jj, t0, t1, itrs, lm, ep, motion_only)
+            self.disps.clamp_(min=0.001)

thirdparty/DROID-SLAM/droid_slam/droid.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import torch
+import lietorch
+import numpy as np
+from droid_net import DroidNet
+from depth_video import DepthVideo
+from motion_filter import MotionFilter
+from droid_frontend import DroidFrontend
+from droid_backend import DroidBackend
+from trajectory_filler import PoseTrajectoryFiller
+from collections import OrderedDict
+from torch.multiprocessing import Process
+class Droid:
+    def __init__(self, args):
+        super(Droid, self).__init__()
+        self.load_weights(args.weights)
+        self.args = args
+        self.disable_vis = args.disable_vis
+        # store images, depth, poses, intrinsics (shared between processes)
+        self.video = DepthVideo(args.image_size, args.buffer, stereo=args.stereo)
+        # filter incoming frames so that there is enough motion
+        self.filterx = MotionFilter(self.net, self.video, thresh=args.filter_thresh)
+        # frontend process
+        self.frontend = DroidFrontend(self.net, self.video, self.args)
+        # backend process
+        self.backend = DroidBackend(self.net, self.video, self.args)
+        # visualizer
+        if not self.disable_vis:
+            # from visualization import droid_visualization
+            from vis_headless import droid_visualization
+            print('Using headless ...')
+            self.visualizer = Process(target=droid_visualization, args=(self.video, '.'))
+            self.visualizer.start()
+        # post processor - fill in poses for non-keyframes
+        self.traj_filler = PoseTrajectoryFiller(self.net, self.video)
+    def load_weights(self, weights):
+        """ load trained model weights """
+        self.net = DroidNet()
+        state_dict = OrderedDict([
+            (k.replace("module.", ""), v) for (k, v) in torch.load(weights).items()])
+        state_dict["update.weight.2.weight"] = state_dict["update.weight.2.weight"][:2]
+        state_dict["update.weight.2.bias"] = state_dict["update.weight.2.bias"][:2]
+        state_dict["update.delta.2.weight"] = state_dict["update.delta.2.weight"][:2]
+        state_dict["update.delta.2.bias"] = state_dict["update.delta.2.bias"][:2]
+        self.net.load_state_dict(state_dict)
+        self.net.to("cuda:0").eval()
+    def track(self, tstamp, image, depth=None, intrinsics=None, mask=None):
+        """ main thread - update map """
+        with torch.no_grad():
+            # check there is enough motion
+            self.filterx.track(tstamp, image, depth, intrinsics, mask)
+            # local bundle adjustment
+            self.frontend()
+            # global bundle adjustment
+            # self.backend()
+    def terminate(self, stream=None, backend=True):
+        """ terminate the visualization process, return poses [t, q] """
+        del self.frontend
+        if backend:
+            torch.cuda.empty_cache()
+            # print("#" * 32)
+            self.backend(7)
+            torch.cuda.empty_cache()
+            # print("#" * 32)
+            self.backend(12)
+        camera_trajectory = self.traj_filler(stream)
+        return camera_trajectory.inv().data.cpu().numpy()
+    def compute_error(self):
+        """ compute slam reprojection error """
+        del self.frontend
+        torch.cuda.empty_cache()
+        self.backend(12)
+        return self.backend.errors[-1]

thirdparty/DROID-SLAM/droid_slam/droid_backend.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import torch
+import lietorch
+import numpy as np
+from lietorch import SE3
+from factor_graph import FactorGraph
+class DroidBackend:
+    def __init__(self, net, video, args):
+        self.video = video
+        self.update_op = net.update
+        # global optimization window
+        self.t0 = 0
+        self.t1 = 0
+        self.upsample = args.upsample
+        self.beta = args.beta
+        self.backend_thresh = args.backend_thresh
+        self.backend_radius = args.backend_radius
+        self.backend_nms = args.backend_nms
+        self.errors = []
+    @torch.no_grad()
+    def __call__(self, steps=12):
+        """ main update """
+        t = self.video.counter.value
+        if not self.video.stereo and not torch.any(self.video.disps_sens):
+             self.video.normalize()
+        graph = FactorGraph(self.video, self.update_op, corr_impl="alt", max_factors=16*t, upsample=self.upsample)
+        graph.add_proximity_factors(rad=self.backend_radius,
+                                    nms=self.backend_nms,
+                                    thresh=self.backend_thresh,
+                                    beta=self.beta)
+        graph.update_lowmem(steps=steps)
+        self.errors.append(self.cal_err(graph))
+        graph.clear_edges()
+        self.video.dirty[:t] = True
+        return
+    def cal_err(self, graph):
+        coord, _ = graph.video.reproject(graph.ii, graph.jj)
+        diff = graph.target - coord
+        err = diff.norm(dim=-1).mean().item()
+        return err

thirdparty/DROID-SLAM/droid_slam/droid_frontend.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import torch
+import lietorch
+import numpy as np
+from lietorch import SE3
+from factor_graph import FactorGraph
+class DroidFrontend:
+    def __init__(self, net, video, args):
+        self.video = video
+        self.update_op = net.update
+        self.graph = FactorGraph(video, net.update, max_factors=48, upsample=args.upsample)
+        # local optimization window
+        self.t0 = 0
+        self.t1 = 0
+        # frontent variables
+        self.is_initialized = False
+        self.count = 0
+        self.max_age = 25
+        self.iters1 = 4
+        self.iters2 = 2
+        self.warmup = args.warmup
+        self.beta = args.beta
+        self.frontend_nms = args.frontend_nms
+        self.keyframe_thresh = args.keyframe_thresh
+        self.frontend_window = args.frontend_window
+        self.frontend_thresh = args.frontend_thresh
+        self.frontend_radius = args.frontend_radius
+    def __update(self):
+        """ add edges, perform update """
+        self.count += 1
+        self.t1 += 1
+        if self.graph.corr is not None:
+            self.graph.rm_factors(self.graph.age > self.max_age, store=True)
+        self.graph.add_proximity_factors(self.t1-5, max(self.t1-self.frontend_window, 0),
+            rad=self.frontend_radius, nms=self.frontend_nms, thresh=self.frontend_thresh, beta=self.beta, remove=True)
+        self.video.disps[self.t1-1] = torch.where(self.video.disps_sens[self.t1-1] > 0,
+           self.video.disps_sens[self.t1-1], self.video.disps[self.t1-1])
+        for itr in range(self.iters1):
+            self.graph.update(None, None, use_inactive=True)
+        # set initial pose for next frame
+        poses = SE3(self.video.poses)
+        d = self.video.distance([self.t1-3], [self.t1-2], beta=self.beta, bidirectional=True)
+        if d.item() < self.keyframe_thresh:
+            self.graph.rm_keyframe(self.t1 - 2)
+            with self.video.get_lock():
+                self.video.counter.value -= 1
+                self.t1 -= 1
+        else:
+            for itr in range(self.iters2):
+                self.graph.update(None, None, use_inactive=True)
+        # set pose for next itration
+        self.video.poses[self.t1] = self.video.poses[self.t1-1]
+        self.video.disps[self.t1] = self.video.disps[self.t1-1].mean()
+        # update visualization
+        self.video.dirty[self.graph.ii.min():self.t1] = True
+    def __initialize(self):
+        """ initialize the SLAM system """
+        self.t0 = 0
+        self.t1 = self.video.counter.value
+        self.graph.add_neighborhood_factors(self.t0, self.t1, r=3)
+        for itr in range(8):
+            self.graph.update(1, use_inactive=True)
+        self.graph.add_proximity_factors(0, 0, rad=2, nms=2, thresh=self.frontend_thresh, remove=False)
+        for itr in range(8):
+            self.graph.update(1, use_inactive=True)
+        # self.video.normalize()
+        self.video.poses[self.t1] = self.video.poses[self.t1-1].clone()
+        self.video.disps[self.t1] = self.video.disps[self.t1-4:self.t1].mean()
+        # initialization complete
+        self.is_initialized = True
+        self.last_pose = self.video.poses[self.t1-1].clone()
+        self.last_disp = self.video.disps[self.t1-1].clone()
+        self.last_time = self.video.tstamp[self.t1-1].clone()
+        with self.video.get_lock():
+            self.video.ready.value = 1
+            self.video.dirty[:self.t1] = True
+        self.graph.rm_factors(self.graph.ii < self.warmup-4, store=True)
+    def __call__(self):
+        """ main update """
+        # do initialization
+        if not self.is_initialized and self.video.counter.value == self.warmup:
+            self.__initialize()
+        # do update
+        elif self.is_initialized and self.t1 < self.video.counter.value:
+            self.__update()

thirdparty/DROID-SLAM/droid_slam/droid_net.py ADDED Viewed

	@@ -0,0 +1,226 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from collections import OrderedDict
+from modules.extractor import BasicEncoder
+from modules.corr import CorrBlock
+from modules.gru import ConvGRU
+from modules.clipping import GradientClip
+from lietorch import SE3
+from geom.ba import BA
+import geom.projective_ops as pops
+from geom.graph_utils import graph_to_edge_list, keyframe_indicies
+from torch_scatter import scatter_mean
+def cvx_upsample(data, mask):
+    """ upsample pixel-wise transformation field """
+    batch, ht, wd, dim = data.shape
+    data = data.permute(0, 3, 1, 2)
+    mask = mask.view(batch, 1, 9, 8, 8, ht, wd)
+    mask = torch.softmax(mask, dim=2)
+    up_data = F.unfold(data, [3,3], padding=1)
+    up_data = up_data.view(batch, dim, 9, 1, 1, ht, wd)
+    up_data = torch.sum(mask * up_data, dim=2)
+    up_data = up_data.permute(0, 4, 2, 5, 3, 1)
+    up_data = up_data.reshape(batch, 8*ht, 8*wd, dim)
+    return up_data
+def upsample_disp(disp, mask):
+    batch, num, ht, wd = disp.shape
+    disp = disp.view(batch*num, ht, wd, 1)
+    mask = mask.view(batch*num, -1, ht, wd)
+    return cvx_upsample(disp, mask).view(batch, num, 8*ht, 8*wd)
+class GraphAgg(nn.Module):
+    def __init__(self):
+        super(GraphAgg, self).__init__()
+        self.conv1 = nn.Conv2d(128, 128, 3, padding=1)
+        self.conv2 = nn.Conv2d(128, 128, 3, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+        self.eta = nn.Sequential(
+            nn.Conv2d(128, 1, 3, padding=1),
+            GradientClip(),
+            nn.Softplus())
+        self.upmask = nn.Sequential(
+            nn.Conv2d(128, 8*8*9, 1, padding=0))
+    def forward(self, net, ii):
+        batch, num, ch, ht, wd = net.shape
+        net = net.view(batch*num, ch, ht, wd)
+        _, ix = torch.unique(ii, return_inverse=True)
+        net = self.relu(self.conv1(net))
+        net = net.view(batch, num, 128, ht, wd)
+        net = scatter_mean(net, ix, dim=1)
+        net = net.view(-1, 128, ht, wd)
+        net = self.relu(self.conv2(net))
+        eta = self.eta(net).view(batch, -1, ht, wd)
+        upmask = self.upmask(net).view(batch, -1, 8*8*9, ht, wd)
+        return .01 * eta, upmask
+class UpdateModule(nn.Module):
+    def __init__(self):
+        super(UpdateModule, self).__init__()
+        cor_planes = 4 * (2*3 + 1)**2
+        self.corr_encoder = nn.Sequential(
+            nn.Conv2d(cor_planes, 128, 1, padding=0),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(128, 128, 3, padding=1),
+            nn.ReLU(inplace=True))
+        self.flow_encoder = nn.Sequential(
+            nn.Conv2d(4, 128, 7, padding=3),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(128, 64, 3, padding=1),
+            nn.ReLU(inplace=True))
+        self.weight = nn.Sequential(
+            nn.Conv2d(128, 128, 3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(128, 2, 3, padding=1),
+            GradientClip(),
+            nn.Sigmoid())
+        self.delta = nn.Sequential(
+            nn.Conv2d(128, 128, 3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(128, 2, 3, padding=1),
+            GradientClip())
+        self.gru = ConvGRU(128, 128+128+64)
+        self.agg = GraphAgg()
+    def forward(self, net, inp, corr, flow=None, ii=None, jj=None, mask=None):
+        """ RaftSLAM update operator """
+        batch, num, ch, ht, wd = net.shape
+        if flow is None:
+            flow = torch.zeros(batch, num, 4, ht, wd, device=net.device)
+        output_dim = (batch, num, -1, ht, wd)
+        net = net.view(batch*num, -1, ht, wd)
+        inp = inp.view(batch*num, -1, ht, wd)
+        corr = corr.view(batch*num, -1, ht, wd)
+        flow = flow.view(batch*num, -1, ht, wd)
+        corr = self.corr_encoder(corr)
+        flow = self.flow_encoder(flow)
+        net = self.gru(net, inp, corr, flow)
+        ### update variables ###
+        delta = self.delta(net).view(*output_dim)
+        weight = self.weight(net).view(*output_dim)
+        # print('Update')
+        # print('delta:', delta.shape)    # [1,1,2,64,48]
+        # print('weight:', weight.shape)  # [1,1,2,64,48]
+        delta = delta.permute(0,1,3,4,2)[...,:2].contiguous()
+        weight = weight.permute(0,1,3,4,2)[...,:2].contiguous()
+        net = net.view(*output_dim)
+        if ii is not None:
+            eta, upmask = self.agg(net, ii.to(net.device))
+            return net, delta, weight, eta, upmask
+        else:
+            return net, delta, weight
+class DroidNet(nn.Module):
+    def __init__(self):
+        super(DroidNet, self).__init__()
+        self.fnet = BasicEncoder(output_dim=128, norm_fn='instance')
+        self.cnet = BasicEncoder(output_dim=256, norm_fn='none')
+        self.update = UpdateModule()
+    def extract_features(self, images):
+        """ run feeature extraction networks """
+        # normalize images
+        images = images[:, :, [2,1,0]] / 255.0
+        mean = torch.as_tensor([0.485, 0.456, 0.406], device=images.device)
+        std = torch.as_tensor([0.229, 0.224, 0.225], device=images.device)
+        images = images.sub_(mean[:, None, None]).div_(std[:, None, None])
+        fmaps = self.fnet(images)
+        net = self.cnet(images)
+        net, inp = net.split([128,128], dim=2)
+        net = torch.tanh(net)
+        inp = torch.relu(inp)
+        return fmaps, net, inp
+    def forward(self, Gs, images, disps, intrinsics, graph=None, num_steps=12, fixedp=2):
+        """ Estimates SE3 or Sim3 between pair of frames """
+        u = keyframe_indicies(graph)
+        ii, jj, kk = graph_to_edge_list(graph)
+        ii = ii.to(device=images.device, dtype=torch.long)
+        jj = jj.to(device=images.device, dtype=torch.long)
+        fmaps, net, inp = self.extract_features(images)
+        net, inp = net[:,ii], inp[:,ii]
+        corr_fn = CorrBlock(fmaps[:,ii], fmaps[:,jj], num_levels=4, radius=3)
+        ht, wd = images.shape[-2:]
+        coords0 = pops.coords_grid(ht//8, wd//8, device=images.device)
+        coords1, _ = pops.projective_transform(Gs, disps, intrinsics, ii, jj)
+        target = coords1.clone()
+        Gs_list, disp_list, residual_list = [], [], []
+        for step in range(num_steps):
+            Gs = Gs.detach()
+            disps = disps.detach()
+            coords1 = coords1.detach()
+            target = target.detach()
+            # extract motion features
+            corr = corr_fn(coords1)
+            resd = target - coords1
+            flow = coords1 - coords0
+            motion = torch.cat([flow, resd], dim=-1)
+            motion = motion.permute(0,1,4,2,3).clamp(-64.0, 64.0)
+            net, delta, weight, eta, upmask = \
+                self.update(net, inp, corr, motion, ii, jj)
+            target = coords1 + delta
+            for i in range(2):
+                Gs, disps = BA(target, weight, eta, Gs, disps, intrinsics, ii, jj, fixedp=2)
+            coords1, valid_mask = pops.projective_transform(Gs, disps, intrinsics, ii, jj)
+            residual = (target - coords1)
+            Gs_list.append(Gs)
+            disp_list.append(upsample_disp(disps, upmask))
+            residual_list.append(valid_mask * residual)
+        return Gs_list, disp_list, residual_list

thirdparty/DROID-SLAM/droid_slam/factor_graph.py ADDED Viewed

	@@ -0,0 +1,397 @@

+import torch
+import lietorch
+import numpy as np
+import matplotlib.pyplot as plt
+from lietorch import SE3
+from modules.corr import CorrBlock, AltCorrBlock
+import geom.projective_ops as pops
+from glob import glob
+class FactorGraph:
+    def __init__(self, video, update_op, device="cuda:0", corr_impl="volume", max_factors=-1, upsample=False):
+        self.video = video
+        self.update_op = update_op
+        self.device = device
+        self.max_factors = max_factors
+        self.corr_impl = corr_impl
+        self.upsample = upsample
+        # operator at 1/8 resolution
+        self.ht = ht = video.ht // 8
+        self.wd = wd = video.wd // 8
+        self.coords0 = pops.coords_grid(ht, wd, device=device)
+        self.ii = torch.as_tensor([], dtype=torch.long, device=device)
+        self.jj = torch.as_tensor([], dtype=torch.long, device=device)
+        self.age = torch.as_tensor([], dtype=torch.long, device=device)
+        self.corr, self.net, self.inp = None, None, None
+        self.damping = 1e-6 * torch.ones_like(self.video.disps)
+        self.target = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
+        self.weight = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
+        # inactive factors
+        self.ii_inac = torch.as_tensor([], dtype=torch.long, device=device)
+        self.jj_inac = torch.as_tensor([], dtype=torch.long, device=device)
+        self.ii_bad = torch.as_tensor([], dtype=torch.long, device=device)
+        self.jj_bad = torch.as_tensor([], dtype=torch.long, device=device)
+        self.target_inac = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
+        self.weight_inac = torch.zeros([1, 0, ht, wd, 2], device=device, dtype=torch.float)
+    def __filter_repeated_edges(self, ii, jj):
+        """ remove duplicate edges """
+        keep = torch.zeros(ii.shape[0], dtype=torch.bool, device=ii.device)
+        eset = set(
+            [(i.item(), j.item()) for i, j in zip(self.ii, self.jj)] +
+            [(i.item(), j.item()) for i, j in zip(self.ii_inac, self.jj_inac)])
+        for k, (i, j) in enumerate(zip(ii, jj)):
+            keep[k] = (i.item(), j.item()) not in eset
+        return ii[keep], jj[keep]
+    def print_edges(self):
+        ii = self.ii.cpu().numpy()
+        jj = self.jj.cpu().numpy()
+        ix = np.argsort(ii)
+        ii = ii[ix]
+        jj = jj[ix]
+        w = torch.mean(self.weight, dim=[0,2,3,4]).cpu().numpy()
+        w = w[ix]
+        for e in zip(ii, jj, w):
+            print(e)
+        print()
+    def filter_edges(self):
+        """ remove bad edges """
+        conf = torch.mean(self.weight, dim=[0,2,3,4])
+        mask = (torch.abs(self.ii-self.jj) > 2) & (conf < 0.001)
+        self.ii_bad = torch.cat([self.ii_bad, self.ii[mask]])
+        self.jj_bad = torch.cat([self.jj_bad, self.jj[mask]])
+        self.rm_factors(mask, store=False)
+    def clear_edges(self):
+        self.rm_factors(self.ii >= 0)
+        self.net = None
+        self.inp = None
+    @torch.cuda.amp.autocast(enabled=True)
+    def add_factors(self, ii, jj, remove=False):
+        """ add edges to factor graph """
+        if not isinstance(ii, torch.Tensor):
+            ii = torch.as_tensor(ii, dtype=torch.long, device=self.device)
+        if not isinstance(jj, torch.Tensor):
+            jj = torch.as_tensor(jj, dtype=torch.long, device=self.device)
+        # remove duplicate edges
+        ii, jj = self.__filter_repeated_edges(ii, jj)
+        if ii.shape[0] == 0:
+            return
+        # place limit on number of factors
+        if self.max_factors > 0 and self.ii.shape[0] + ii.shape[0] > self.max_factors \
+                and self.corr is not None and remove:
+            ix = torch.arange(len(self.age))[torch.argsort(self.age).cpu()]
+            self.rm_factors(ix >= self.max_factors - ii.shape[0], store=True)
+        net = self.video.nets[ii].to(self.device).unsqueeze(0)
+        # correlation volume for new edges
+        if self.corr_impl == "volume":
+            c = (ii == jj).long()
+            fmap1 = self.video.fmaps[ii,0].to(self.device).unsqueeze(0)
+            fmap2 = self.video.fmaps[jj,c].to(self.device).unsqueeze(0)
+            corr = CorrBlock(fmap1, fmap2)
+            self.corr = corr if self.corr is None else self.corr.cat(corr)
+            inp = self.video.inps[ii].to(self.device).unsqueeze(0)
+            self.inp = inp if self.inp is None else torch.cat([self.inp, inp], 1)
+        with torch.cuda.amp.autocast(enabled=False):
+            target, _ = self.video.reproject(ii, jj)
+            weight = torch.zeros_like(target)
+        self.ii = torch.cat([self.ii, ii], 0)
+        self.jj = torch.cat([self.jj, jj], 0)
+        self.age = torch.cat([self.age, torch.zeros_like(ii)], 0)
+        # reprojection factors
+        self.net = net if self.net is None else torch.cat([self.net, net], 1)
+        self.target = torch.cat([self.target, target], 1)
+        self.weight = torch.cat([self.weight, weight], 1)
+    @torch.cuda.amp.autocast(enabled=True)
+    def rm_factors(self, mask, store=False):
+        """ drop edges from factor graph """
+        # store estimated factors
+        if store:
+            self.ii_inac = torch.cat([self.ii_inac, self.ii[mask]], 0)
+            self.jj_inac = torch.cat([self.jj_inac, self.jj[mask]], 0)
+            self.target_inac = torch.cat([self.target_inac, self.target[:,mask]], 1)
+            self.weight_inac = torch.cat([self.weight_inac, self.weight[:,mask]], 1)
+        self.ii = self.ii[~mask]
+        self.jj = self.jj[~mask]
+        self.age = self.age[~mask]
+        if self.corr_impl == "volume":
+            self.corr = self.corr[~mask]
+        if self.net is not None:
+            self.net = self.net[:,~mask]
+        if self.inp is not None:
+            self.inp = self.inp[:,~mask]
+        self.target = self.target[:,~mask]
+        self.weight = self.weight[:,~mask]
+    @torch.cuda.amp.autocast(enabled=True)
+    def rm_keyframe(self, ix):
+        """ drop edges from factor graph """
+        with self.video.get_lock():
+            self.video.images[ix] = self.video.images[ix+1]
+            self.video.poses[ix] = self.video.poses[ix+1]
+            self.video.disps[ix] = self.video.disps[ix+1]
+            self.video.disps_sens[ix] = self.video.disps_sens[ix+1]
+            self.video.intrinsics[ix] = self.video.intrinsics[ix+1]
+            self.video.nets[ix] = self.video.nets[ix+1]
+            self.video.inps[ix] = self.video.inps[ix+1]
+            self.video.fmaps[ix] = self.video.fmaps[ix+1]
+            self.video.tstamp[ix] = self.video.tstamp[ix+1]
+            self.video.masks[ix] = self.video.masks[ix+1]
+        m = (self.ii_inac == ix) | (self.jj_inac == ix)
+        self.ii_inac[self.ii_inac >= ix] -= 1
+        self.jj_inac[self.jj_inac >= ix] -= 1
+        if torch.any(m):
+            self.ii_inac = self.ii_inac[~m]
+            self.jj_inac = self.jj_inac[~m]
+            self.target_inac = self.target_inac[:,~m]
+            self.weight_inac = self.weight_inac[:,~m]
+        m = (self.ii == ix) | (self.jj == ix)
+        self.ii[self.ii >= ix] -= 1
+        self.jj[self.jj >= ix] -= 1
+        self.rm_factors(m, store=False)
+    @torch.cuda.amp.autocast(enabled=True)
+    def update(self, t0=None, t1=None, itrs=3, use_inactive=False, EP=1e-7, motion_only=False):
+        """ run update operator on factor graph """
+        # motion features
+        with torch.cuda.amp.autocast(enabled=False):
+            coords1, mask = self.video.reproject(self.ii, self.jj)
+            motn = torch.cat([coords1 - self.coords0, self.target - coords1], dim=-1)
+            motn = motn.permute(0,1,4,2,3).clamp(-64.0, 64.0)
+        # correlation features
+        corr = self.corr(coords1)
+        self.net, delta, weight, damping, upmask = \
+            self.update_op(self.net, self.inp, corr, motn, self.ii, self.jj)
+        ##### save confidecnce weight for vis #####
+        # for k in range(len(self.ii)):
+        #     w = weight[:, k].detach().cpu().numpy()
+        #     idx_i = self.ii[k]
+        #     idx_j = self.jj[k]
+        #     np.save(f'pred_conf/{idx_i:04d}_{idx_j:04d}.npy', w)
+        #############################################
+        # Shapes:
+        # weight: [1, k, h//8, w//8, 2]
+        # self.ii: [k]; self.jj: [k]
+        msk = self.video.masks[self.ii] > 0
+        weight[:,msk] = 0.0
+        if t0 is None:
+            t0 = max(1, self.ii.min().item()+1)
+        with torch.cuda.amp.autocast(enabled=False):
+            self.target = coords1 + delta.to(dtype=torch.float)
+            self.weight = weight.to(dtype=torch.float)
+            ht, wd = self.coords0.shape[0:2]
+            self.damping[torch.unique(self.ii)] = damping
+            if use_inactive:
+                m = (self.ii_inac >= t0 - 3) & (self.jj_inac >= t0 - 3)
+                ii = torch.cat([self.ii_inac[m], self.ii], 0)
+                jj = torch.cat([self.jj_inac[m], self.jj], 0)
+                target = torch.cat([self.target_inac[:,m], self.target], 1)
+                weight = torch.cat([self.weight_inac[:,m], self.weight], 1)
+            else:
+                ii, jj, target, weight = self.ii, self.jj, self.target, self.weight
+            damping = .2 * self.damping[torch.unique(ii)].contiguous() + EP
+            target = target.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
+            weight = weight.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
+            # dense bundle adjustment
+            self.video.ba(target, weight, damping, ii, jj, t0, t1,
+                itrs=itrs, lm=1e-4, ep=0.1, motion_only=motion_only)
+            if self.upsample:
+                self.video.upsample(torch.unique(self.ii), upmask)
+        self.age += 1
+    @torch.cuda.amp.autocast(enabled=False)
+    def update_lowmem(self, t0=None, t1=None, itrs=2, use_inactive=False, EP=1e-7, steps=8):
+        """ run update operator on factor graph - reduced memory implementation """
+        # alternate corr implementation
+        t = self.video.counter.value
+        num, rig, ch, ht, wd = self.video.fmaps.shape
+        corr_op = AltCorrBlock(self.video.fmaps.view(1, num*rig, ch, ht, wd))
+        print("Global BA Iteration with {} steps".format(steps))
+        for step in range(steps):
+            # print("Global BA Iteration #{}".format(step+1))
+            with torch.cuda.amp.autocast(enabled=False):
+                coords1, mask = self.video.reproject(self.ii, self.jj)
+                motn = torch.cat([coords1 - self.coords0, self.target - coords1], dim=-1)
+                motn = motn.permute(0,1,4,2,3).clamp(-64.0, 64.0)
+            s = 8
+            for i in range(0, self.jj.max()+1, s):
+                v = (self.ii >= i) & (self.ii < i + s)
+                iis = self.ii[v]
+                jjs = self.jj[v]
+                ht, wd = self.coords0.shape[0:2]
+                corr1 = corr_op(coords1[:,v], rig * iis, rig * jjs + (iis == jjs).long())
+                with torch.cuda.amp.autocast(enabled=True):
+                    net, delta, weight, damping, upmask = \
+                        self.update_op(self.net[:,v], self.video.inps[None,iis], corr1, motn[:,v], iis, jjs)
+                    if self.upsample:
+                        self.video.upsample(torch.unique(iis), upmask)
+                    # Shapes:
+                    # weight: [1, k, h//8, w//8, 2]
+                    # self.ii: [k]; self.jj: [k]
+                    msk = self.video.masks[iis] > 0
+                    weight[:,msk] = 0.0
+                self.net[:,v] = net
+                self.target[:,v] = coords1[:,v] + delta.float()
+                self.weight[:,v] = weight.float()
+                self.damping[torch.unique(iis)] = damping
+            damping = .2 * self.damping[torch.unique(self.ii)].contiguous() + EP
+            target = self.target.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
+            weight = self.weight.view(-1, ht, wd, 2).permute(0,3,1,2).contiguous()
+            # dense bundle adjustment
+            self.video.ba(target, weight, damping, self.ii, self.jj, 1, t,
+                itrs=itrs, lm=1e-5, ep=1e-2, motion_only=False)
+            self.video.dirty[:t] = True
+    def add_neighborhood_factors(self, t0, t1, r=3):
+        """ add edges between neighboring frames within radius r """
+        ii, jj = torch.meshgrid(torch.arange(t0,t1), torch.arange(t0,t1), indexing='ij')
+        ii = ii.reshape(-1).to(dtype=torch.long, device=self.device)
+        jj = jj.reshape(-1).to(dtype=torch.long, device=self.device)
+        c = 1 if self.video.stereo else 0
+        keep = ((ii - jj).abs() > c) & ((ii - jj).abs() <= r)
+        self.add_factors(ii[keep], jj[keep])
+    def add_proximity_factors(self, t0=0, t1=0, rad=2, nms=2, beta=0.25, thresh=16.0, remove=False):
+        """ add edges to the factor graph based on distance """
+        t = self.video.counter.value
+        ix = torch.arange(t0, t)
+        jx = torch.arange(t1, t)
+        ii, jj = torch.meshgrid(ix, jx, indexing='ij')
+        ii = ii.reshape(-1)
+        jj = jj.reshape(-1)
+        d = self.video.distance(ii, jj, beta=beta)
+        d[ii - rad < jj] = np.inf
+        d[d > 100] = np.inf
+        ii1 = torch.cat([self.ii, self.ii_bad, self.ii_inac], 0)
+        jj1 = torch.cat([self.jj, self.jj_bad, self.jj_inac], 0)
+        for i, j in zip(ii1.cpu().numpy(), jj1.cpu().numpy()):
+            for di in range(-nms, nms+1):
+                for dj in range(-nms, nms+1):
+                    if abs(di) + abs(dj) <= max(min(abs(i-j)-2, nms), 0):
+                        i1 = i + di
+                        j1 = j + dj
+                        if (t0 <= i1 < t) and (t1 <= j1 < t):
+                            d[(i1-t0)*(t-t1) + (j1-t1)] = np.inf
+        es = []
+        for i in range(t0, t):
+            if self.video.stereo:
+                es.append((i, i))
+                d[(i-t0)*(t-t1) + (i-t1)] = np.inf
+            for j in range(max(i-rad-1,0), i):
+                es.append((i,j))
+                es.append((j,i))
+                d[(i-t0)*(t-t1) + (j-t1)] = np.inf
+        ix = torch.argsort(d)
+        for k in ix:
+            if d[k].item() > thresh:
+                continue
+            if len(es) > self.max_factors:
+                break
+            i = ii[k]
+            j = jj[k]
+            # bidirectional
+            es.append((i, j))
+            es.append((j, i))
+            for di in range(-nms, nms+1):
+                for dj in range(-nms, nms+1):
+                    if abs(di) + abs(dj) <= max(min(abs(i-j)-2, nms), 0):
+                        i1 = i + di
+                        j1 = j + dj
+                        if (t0 <= i1 < t) and (t1 <= j1 < t):
+                            d[(i1-t0)*(t-t1) + (j1-t1)] = np.inf
+        ii, jj = torch.as_tensor(es, device=self.device).unbind(dim=-1)
+        self.add_factors(ii, jj, remove)

thirdparty/DROID-SLAM/droid_slam/geom/__init__.py ADDED Viewed

File without changes

thirdparty/DROID-SLAM/droid_slam/geom/ba.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import lietorch
+import torch
+import torch.nn.functional as F
+from .chol import block_solve, schur_solve
+import geom.projective_ops as pops
+from torch_scatter import scatter_sum
+# utility functions for scattering ops
+def safe_scatter_add_mat(A, ii, jj, n, m):
+    v = (ii >= 0) & (jj >= 0) & (ii < n) & (jj < m)
+    return scatter_sum(A[:,v], ii[v]*m + jj[v], dim=1, dim_size=n*m)
+def safe_scatter_add_vec(b, ii, n):
+    v = (ii >= 0) & (ii < n)
+    return scatter_sum(b[:,v], ii[v], dim=1, dim_size=n)
+# apply retraction operator to inv-depth maps
+def disp_retr(disps, dz, ii):
+    ii = ii.to(device=dz.device)
+    return disps + scatter_sum(dz, ii, dim=1, dim_size=disps.shape[1])
+# apply retraction operator to poses
+def pose_retr(poses, dx, ii):
+    ii = ii.to(device=dx.device)
+    return poses.retr(scatter_sum(dx, ii, dim=1, dim_size=poses.shape[1]))
+def BA(target, weight, eta, poses, disps, intrinsics, ii, jj, fixedp=1, rig=1):
+    """ Full Bundle Adjustment """
+    B, P, ht, wd = disps.shape
+    N = ii.shape[0]
+    D = poses.manifold_dim
+    ### 1: commpute jacobians and residuals ###
+    coords, valid, (Ji, Jj, Jz) = pops.projective_transform(
+        poses, disps, intrinsics, ii, jj, jacobian=True)
+    r = (target - coords).view(B, N, -1, 1)
+    w = .001 * (valid * weight).view(B, N, -1, 1)
+    ### 2: construct linear system ###
+    Ji = Ji.reshape(B, N, -1, D)
+    Jj = Jj.reshape(B, N, -1, D)
+    wJiT = (w * Ji).transpose(2,3)
+    wJjT = (w * Jj).transpose(2,3)
+    Jz = Jz.reshape(B, N, ht*wd, -1)
+    Hii = torch.matmul(wJiT, Ji)
+    Hij = torch.matmul(wJiT, Jj)
+    Hji = torch.matmul(wJjT, Ji)
+    Hjj = torch.matmul(wJjT, Jj)
+    vi = torch.matmul(wJiT, r).squeeze(-1)
+    vj = torch.matmul(wJjT, r).squeeze(-1)
+    Ei = (wJiT.view(B,N,D,ht*wd,-1) * Jz[:,:,None]).sum(dim=-1)
+    Ej = (wJjT.view(B,N,D,ht*wd,-1) * Jz[:,:,None]).sum(dim=-1)
+    w = w.view(B, N, ht*wd, -1)
+    r = r.view(B, N, ht*wd, -1)
+    wk = torch.sum(w*r*Jz, dim=-1)
+    Ck = torch.sum(w*Jz*Jz, dim=-1)
+    kx, kk = torch.unique(ii, return_inverse=True)
+    M = kx.shape[0]
+    # only optimize keyframe poses
+    P = P // rig - fixedp
+    ii = ii // rig - fixedp
+    jj = jj // rig - fixedp
+    H = safe_scatter_add_mat(Hii, ii, ii, P, P) + \
+        safe_scatter_add_mat(Hij, ii, jj, P, P) + \
+        safe_scatter_add_mat(Hji, jj, ii, P, P) + \
+        safe_scatter_add_mat(Hjj, jj, jj, P, P)
+    E = safe_scatter_add_mat(Ei, ii, kk, P, M) + \
+        safe_scatter_add_mat(Ej, jj, kk, P, M)
+    v = safe_scatter_add_vec(vi, ii, P) + \
+        safe_scatter_add_vec(vj, jj, P)
+    C = safe_scatter_add_vec(Ck, kk, M)
+    w = safe_scatter_add_vec(wk, kk, M)
+    C = C + eta.view(*C.shape) + 1e-7
+    H = H.view(B, P, P, D, D)
+    E = E.view(B, P, M, D, ht*wd)
+    ### 3: solve the system ###
+    dx, dz = schur_solve(H, E, C, v, w)
+    ### 4: apply retraction ###
+    poses = pose_retr(poses, dx, torch.arange(P) + fixedp)
+    disps = disp_retr(disps, dz.view(B,-1,ht,wd), kx)
+    disps = torch.where(disps > 10, torch.zeros_like(disps), disps)
+    disps = disps.clamp(min=0.0)
+    return poses, disps
+def MoBA(target, weight, eta, poses, disps, intrinsics, ii, jj, fixedp=1, rig=1):
+    """ Motion only bundle adjustment """
+    B, P, ht, wd = disps.shape
+    N = ii.shape[0]
+    D = poses.manifold_dim
+    ### 1: commpute jacobians and residuals ###
+    coords, valid, (Ji, Jj, Jz) = pops.projective_transform(
+        poses, disps, intrinsics, ii, jj, jacobian=True)
+    r = (target - coords).view(B, N, -1, 1)
+    w = .001 * (valid * weight).view(B, N, -1, 1)
+    ### 2: construct linear system ###
+    Ji = Ji.reshape(B, N, -1, D)
+    Jj = Jj.reshape(B, N, -1, D)
+    wJiT = (w * Ji).transpose(2,3)
+    wJjT = (w * Jj).transpose(2,3)
+    Hii = torch.matmul(wJiT, Ji)
+    Hij = torch.matmul(wJiT, Jj)
+    Hji = torch.matmul(wJjT, Ji)
+    Hjj = torch.matmul(wJjT, Jj)
+    vi = torch.matmul(wJiT, r).squeeze(-1)
+    vj = torch.matmul(wJjT, r).squeeze(-1)
+    # only optimize keyframe poses
+    P = P // rig - fixedp
+    ii = ii // rig - fixedp
+    jj = jj // rig - fixedp
+    H = safe_scatter_add_mat(Hii, ii, ii, P, P) + \
+        safe_scatter_add_mat(Hij, ii, jj, P, P) + \
+        safe_scatter_add_mat(Hji, jj, ii, P, P) + \
+        safe_scatter_add_mat(Hjj, jj, jj, P, P)
+    v = safe_scatter_add_vec(vi, ii, P) + \
+        safe_scatter_add_vec(vj, jj, P)
+    H = H.view(B, P, P, D, D)
+    ### 3: solve the system ###
+    dx = block_solve(H, v)
+    ### 4: apply retraction ###
+    poses = pose_retr(poses, dx, torch.arange(P) + fixedp)
+    return poses

thirdparty/DROID-SLAM/droid_slam/geom/chol.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import torch
+import torch.nn.functional as F
+import geom.projective_ops as pops
+class CholeskySolver(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, H, b):
+        # don't crash training if cholesky decomp fails
+        try:
+            U = torch.linalg.cholesky(H)
+            xs = torch.cholesky_solve(b, U)
+            ctx.save_for_backward(U, xs)
+            ctx.failed = False
+        except Exception as e:
+            print(e)
+            ctx.failed = True
+            xs = torch.zeros_like(b)
+        return xs
+    @staticmethod
+    def backward(ctx, grad_x):
+        if ctx.failed:
+            return None, None
+        U, xs = ctx.saved_tensors
+        dz = torch.cholesky_solve(grad_x, U)
+        dH = -torch.matmul(xs, dz.transpose(-1,-2))
+        return dH, dz
+def block_solve(H, b, ep=0.1, lm=0.0001):
+    """ solve normal equations """
+    B, N, _, D, _ = H.shape
+    I = torch.eye(D).to(H.device)
+    H = H + (ep + lm*H) * I
+    H = H.permute(0,1,3,2,4)
+    H = H.reshape(B, N*D, N*D)
+    b = b.reshape(B, N*D, 1)
+    x = CholeskySolver.apply(H,b)
+    return x.reshape(B, N, D)
+def schur_solve(H, E, C, v, w, ep=0.1, lm=0.0001, sless=False):
+    """ solve using shur complement """
+    B, P, M, D, HW = E.shape
+    H = H.permute(0,1,3,2,4).reshape(B, P*D, P*D)
+    E = E.permute(0,1,3,2,4).reshape(B, P*D, M*HW)
+    Q = (1.0 / C).view(B, M*HW, 1)
+    # damping
+    I = torch.eye(P*D).to(H.device)
+    H = H + (ep + lm*H) * I
+    v = v.reshape(B, P*D, 1)
+    w = w.reshape(B, M*HW, 1)
+    Et = E.transpose(1,2)
+    S = H - torch.matmul(E, Q*Et)
+    v = v - torch.matmul(E, Q*w)
+    dx = CholeskySolver.apply(S, v)
+    if sless:
+        return dx.reshape(B, P, D)
+    dz = Q * (w - Et @ dx)
+    dx = dx.reshape(B, P, D)
+    dz = dz.reshape(B, M, HW)
+    return dx, dz

thirdparty/DROID-SLAM/droid_slam/geom/graph_utils.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import torch
+import numpy as np
+from collections import OrderedDict
+import lietorch
+from data_readers.rgbd_utils import compute_distance_matrix_flow, compute_distance_matrix_flow2
+def graph_to_edge_list(graph):
+    ii, jj, kk = [], [], []
+    for s, u in enumerate(graph):
+        for v in graph[u]:
+            ii.append(u)
+            jj.append(v)
+            kk.append(s)
+    ii = torch.as_tensor(ii)
+    jj = torch.as_tensor(jj)
+    kk = torch.as_tensor(kk)
+    return ii, jj, kk
+def keyframe_indicies(graph):
+    return torch.as_tensor([u for u in graph])
+def meshgrid(m, n, device='cuda'):
+    ii, jj = torch.meshgrid(torch.arange(m), torch.arange(n), indexing='ij')
+    return ii.reshape(-1).to(device), jj.reshape(-1).to(device)
+def neighbourhood_graph(n, r):
+    ii, jj = meshgrid(n, n)
+    d = (ii - jj).abs()
+    keep = (d >= 1) & (d <= r)
+    return ii[keep], jj[keep]
+def build_frame_graph(poses, disps, intrinsics, num=16, thresh=24.0, r=2):
+    """ construct a frame graph between co-visible frames """
+    N = poses.shape[1]
+    poses = poses[0].cpu().numpy()
+    disps = disps[0][:,3::8,3::8].cpu().numpy()
+    intrinsics = intrinsics[0].cpu().numpy() / 8.0
+    d = compute_distance_matrix_flow(poses, disps, intrinsics)
+    count = 0
+    graph = OrderedDict()
+    for i in range(N):
+        graph[i] = []
+        d[i,i] = np.inf
+        for j in range(i-r, i+r+1):
+            if 0 <= j < N and i != j:
+                graph[i].append(j)
+                d[i,j] = np.inf
+                count += 1
+    while count < num:
+        ix = np.argmin(d)
+        i, j = ix // N, ix % N
+        if d[i,j] < thresh:
+            graph[i].append(j)
+            d[i,j] = np.inf
+            count += 1
+        else:
+            break
+    return graph
+def build_frame_graph_v2(poses, disps, intrinsics, num=16, thresh=24.0, r=2):
+    """ construct a frame graph between co-visible frames """
+    N = poses.shape[1]
+    # poses = poses[0].cpu().numpy()
+    # disps = disps[0].cpu().numpy()
+    # intrinsics = intrinsics[0].cpu().numpy()
+    d = compute_distance_matrix_flow2(poses, disps, intrinsics)
+    # import matplotlib.pyplot as plt
+    # plt.imshow(d)
+    # plt.show()
+    count = 0
+    graph = OrderedDict()
+    for i in range(N):
+        graph[i] = []
+        d[i,i] = np.inf
+        for j in range(i-r, i+r+1):
+            if 0 <= j < N and i != j:
+                graph[i].append(j)
+                d[i,j] = np.inf
+                count += 1
+    while 1:
+        ix = np.argmin(d)
+        i, j = ix // N, ix % N
+        if d[i,j] < thresh:
+            graph[i].append(j)
+            for i1 in range(i-1, i+2):
+                for j1 in range(j-1, j+2):
+                    if 0 <= i1 < N and 0 <= j1 < N:
+                        d[i1, j1] = np.inf
+            count += 1
+        else:
+            break
+    return graph

thirdparty/DROID-SLAM/droid_slam/geom/losses.py ADDED Viewed

	@@ -0,0 +1,118 @@

+from collections import OrderedDict
+import numpy as np
+import torch
+from lietorch import SO3, SE3, Sim3
+from .graph_utils import graph_to_edge_list
+from .projective_ops import projective_transform
+def pose_metrics(dE):
+    """ Translation/Rotation/Scaling metrics from Sim3 """
+    t, q, s = dE.data.split([3, 4, 1], -1)
+    ang = SO3(q).log().norm(dim=-1)
+    # convert radians to degrees
+    r_err = (180 / np.pi) * ang
+    t_err = t.norm(dim=-1)
+    s_err = (s - 1.0).abs()
+    return r_err, t_err, s_err
+def fit_scale(Ps, Gs):
+    b = Ps.shape[0]
+    t1 = Ps.data[...,:3].detach().reshape(b, -1)
+    t2 = Gs.data[...,:3].detach().reshape(b, -1)
+    s = (t1*t2).sum(-1) / ((t2*t2).sum(-1) + 1e-8)
+    return s
+def geodesic_loss(Ps, Gs, graph, gamma=0.9, do_scale=True):
+    """ Loss function for training network """
+    # relative pose
+    ii, jj, kk = graph_to_edge_list(graph)
+    dP = Ps[:,jj] * Ps[:,ii].inv()
+    n = len(Gs)
+    geodesic_loss = 0.0
+    for i in range(n):
+        w = gamma ** (n - i - 1)
+        dG = Gs[i][:,jj] * Gs[i][:,ii].inv()
+        if do_scale:
+            s = fit_scale(dP, dG)
+            dG = dG.scale(s[:,None])
+        # pose error
+        d = (dG * dP.inv()).log()
+        if isinstance(dG, SE3):
+            tau, phi = d.split([3,3], dim=-1)
+            geodesic_loss += w * (
+                tau.norm(dim=-1).mean() +
+                phi.norm(dim=-1).mean())
+        elif isinstance(dG, Sim3):
+            tau, phi, sig = d.split([3,3,1], dim=-1)
+            geodesic_loss += w * (
+                tau.norm(dim=-1).mean() +
+                phi.norm(dim=-1).mean() +
+                0.05 * sig.norm(dim=-1).mean())
+        dE = Sim3(dG * dP.inv()).detach()
+        r_err, t_err, s_err = pose_metrics(dE)
+    metrics = {
+        'rot_error': r_err.mean().item(),
+        'tr_error': t_err.mean().item(),
+        'bad_rot': (r_err < .1).float().mean().item(),
+        'bad_tr': (t_err < .01).float().mean().item(),
+    }
+    return geodesic_loss, metrics
+def residual_loss(residuals, gamma=0.9):
+    """ loss on system residuals """
+    residual_loss = 0.0
+    n = len(residuals)
+    for i in range(n):
+        w = gamma ** (n - i - 1)
+        residual_loss += w * residuals[i].abs().mean()
+    return residual_loss, {'residual': residual_loss.item()}
+def flow_loss(Ps, disps, poses_est, disps_est, intrinsics, graph, gamma=0.9):
+    """ optical flow loss """
+    N = Ps.shape[1]
+    graph = OrderedDict()
+    for i in range(N):
+        graph[i] = [j for j in range(N) if abs(i-j)==1]
+    ii, jj, kk = graph_to_edge_list(graph)
+    coords0, val0 = projective_transform(Ps, disps, intrinsics, ii, jj)
+    val0 = val0 * (disps[:,ii] > 0).float().unsqueeze(dim=-1)
+    n = len(poses_est)
+    flow_loss = 0.0
+    for i in range(n):
+        w = gamma ** (n - i - 1)
+        coords1, val1 = projective_transform(poses_est[i], disps_est[i], intrinsics, ii, jj)
+        v = (val0 * val1).squeeze(dim=-1)
+        epe = v * (coords1 - coords0).norm(dim=-1)
+        flow_loss += w * epe.mean()
+    epe = epe.reshape(-1)[v.reshape(-1) > 0.5]
+    metrics = {
+        'f_error': epe.mean().item(),
+        '1px': (epe<1.0).float().mean().item(),
+    }
+    return flow_loss, metrics

thirdparty/DROID-SLAM/droid_slam/geom/projective_ops.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import torch
+import torch.nn.functional as F
+from lietorch import SE3, Sim3
+MIN_DEPTH = 0.2
+def extract_intrinsics(intrinsics):
+    return intrinsics[...,None,None,:].unbind(dim=-1)
+def coords_grid(ht, wd, **kwargs):
+    y, x = torch.meshgrid(
+        torch.arange(ht).to(**kwargs).float(),
+        torch.arange(wd).to(**kwargs).float(), indexing='ij')
+    return torch.stack([x, y], dim=-1)
+def iproj(disps, intrinsics, jacobian=False):
+    """ pinhole camera inverse projection """
+    ht, wd = disps.shape[2:]
+    fx, fy, cx, cy = extract_intrinsics(intrinsics)
+    y, x = torch.meshgrid(
+        torch.arange(ht).to(disps.device).float(),
+        torch.arange(wd).to(disps.device).float(), indexing='ij')
+    i = torch.ones_like(disps)
+    X = (x - cx) / fx
+    Y = (y - cy) / fy
+    pts = torch.stack([X, Y, i, disps], dim=-1)
+    if jacobian:
+        J = torch.zeros_like(pts)
+        J[...,-1] = 1.0
+        return pts, J
+    return pts, None
+def proj(Xs, intrinsics, jacobian=False, return_depth=False):
+    """ pinhole camera projection """
+    fx, fy, cx, cy = extract_intrinsics(intrinsics)
+    X, Y, Z, D = Xs.unbind(dim=-1)
+    Z = torch.where(Z < 0.5*MIN_DEPTH, torch.ones_like(Z), Z)
+    d = 1.0 / Z
+    x = fx * (X * d) + cx
+    y = fy * (Y * d) + cy
+    if return_depth:
+        coords = torch.stack([x, y, D*d], dim=-1)
+    else:
+        coords = torch.stack([x, y], dim=-1)
+    if jacobian:
+        B, N, H, W = d.shape
+        o = torch.zeros_like(d)
+        proj_jac = torch.stack([
+             fx*d,     o, -fx*X*d*d,  o,
+                o,  fy*d, -fy*Y*d*d,  o,
+                # o,     o,    -D*d*d,  d,
+        ], dim=-1).view(B, N, H, W, 2, 4)
+        return coords, proj_jac
+    return coords, None
+def actp(Gij, X0, jacobian=False):
+    """ action on point cloud """
+    X1 = Gij[:,:,None,None] * X0
+    if jacobian:
+        X, Y, Z, d = X1.unbind(dim=-1)
+        o = torch.zeros_like(d)
+        B, N, H, W = d.shape
+        if isinstance(Gij, SE3):
+            Ja = torch.stack([
+                d,  o,  o,  o,  Z, -Y,
+                o,  d,  o, -Z,  o,  X,
+                o,  o,  d,  Y, -X,  o,
+                o,  o,  o,  o,  o,  o,
+            ], dim=-1).view(B, N, H, W, 4, 6)
+        elif isinstance(Gij, Sim3):
+            Ja = torch.stack([
+                d,  o,  o,  o,  Z, -Y,  X,
+                o,  d,  o, -Z,  o,  X,  Y,
+                o,  o,  d,  Y, -X,  o,  Z,
+                o,  o,  o,  o,  o,  o,  o
+            ], dim=-1).view(B, N, H, W, 4, 7)
+        return X1, Ja
+    return X1, None
+def projective_transform(poses, depths, intrinsics, ii, jj, jacobian=False, return_depth=False):
+    """ map points from ii->jj """
+    # inverse project (pinhole)
+    X0, Jz = iproj(depths[:,ii], intrinsics[:,ii], jacobian=jacobian)
+    # transform
+    Gij = poses[:,jj] * poses[:,ii].inv()
+    Gij.data[:,ii==jj] = torch.as_tensor([-0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], device="cuda")
+    X1, Ja = actp(Gij, X0, jacobian=jacobian)
+    # project (pinhole)
+    x1, Jp = proj(X1, intrinsics[:,jj], jacobian=jacobian, return_depth=return_depth)
+    # exclude points too close to camera
+    valid = ((X1[...,2] > MIN_DEPTH) & (X0[...,2] > MIN_DEPTH)).float()
+    valid = valid.unsqueeze(-1)
+    if jacobian:
+        # Ji transforms according to dual adjoint
+        Jj = torch.matmul(Jp, Ja)
+        Ji = -Gij[:,:,None,None,None].adjT(Jj)
+        Jz = Gij[:,:,None,None] * Jz
+        Jz = torch.matmul(Jp, Jz.unsqueeze(-1))
+        return x1, valid, (Ji, Jj, Jz)
+    return x1, valid
+def induced_flow(poses, disps, intrinsics, ii, jj):
+    """ optical flow induced by camera motion """
+    ht, wd = disps.shape[2:]
+    y, x = torch.meshgrid(
+        torch.arange(ht).to(disps.device).float(),
+        torch.arange(wd).to(disps.device).float(), indexing='ij')
+    coords0 = torch.stack([x, y], dim=-1)
+    coords1, valid = projective_transform(poses, disps, intrinsics, ii, jj, False)
+    return coords1[...,:2] - coords0, valid

thirdparty/DROID-SLAM/droid_slam/logger.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import torch
+from torch.utils.tensorboard import SummaryWriter
+SUM_FREQ = 100
+class Logger:
+    def __init__(self, name, scheduler):
+        self.total_steps = 0
+        self.running_loss = {}
+        self.writer = None
+        self.name = name
+        self.scheduler = scheduler
+    def _print_training_status(self):
+        if self.writer is None:
+            self.writer = SummaryWriter('runs/%s' % self.name)
+            print([k for k in self.running_loss])
+        lr = self.scheduler.get_lr().pop()
+        metrics_data = [self.running_loss[k]/SUM_FREQ for k in self.running_loss.keys()]
+        training_str = "[{:6d}, {:10.7f}] ".format(self.total_steps+1, lr)
+        metrics_str = ("{:10.4f}, "*len(metrics_data)).format(*metrics_data)
+        # print the training status
+        print(training_str + metrics_str)
+        for key in self.running_loss:
+            val = self.running_loss[key] / SUM_FREQ
+            self.writer.add_scalar(key, val, self.total_steps)
+            self.running_loss[key] = 0.0
+    def push(self, metrics):
+        for key in metrics:
+            if key not in self.running_loss:
+                self.running_loss[key] = 0.0
+            self.running_loss[key] += metrics[key]
+        if self.total_steps % SUM_FREQ == SUM_FREQ-1:
+            self._print_training_status()
+            self.running_loss = {}
+        self.total_steps += 1
+    def write_dict(self, results):
+        for key in results:
+            self.writer.add_scalar(key, results[key], self.total_steps)
+    def close(self):
+        self.writer.close()

thirdparty/DROID-SLAM/droid_slam/modules/__init__.py ADDED Viewed

File without changes

thirdparty/DROID-SLAM/droid_slam/modules/clipping.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+GRAD_CLIP = .01
+class GradClip(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x):
+        return x
+    @staticmethod
+    def backward(ctx, grad_x):
+        o = torch.zeros_like(grad_x)
+        grad_x = torch.where(grad_x.abs()>GRAD_CLIP, o, grad_x)
+        grad_x = torch.where(torch.isnan(grad_x), o, grad_x)
+        return grad_x
+class GradientClip(nn.Module):
+    def __init__(self):
+        super(GradientClip, self).__init__()
+    def forward(self, x):
+        return GradClip.apply(x)

thirdparty/DROID-SLAM/droid_slam/modules/corr.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import torch
+import torch.nn.functional as F
+import droid_backends
+class CorrSampler(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, volume, coords, radius):
+        ctx.save_for_backward(volume,coords)
+        ctx.radius = radius
+        corr, = droid_backends.corr_index_forward(volume, coords, radius)
+        return corr
+    @staticmethod
+    def backward(ctx, grad_output):
+        volume, coords = ctx.saved_tensors
+        grad_output = grad_output.contiguous()
+        grad_volume, = droid_backends.corr_index_backward(volume, coords, grad_output, ctx.radius)
+        return grad_volume, None, None
+class CorrBlock:
+    def __init__(self, fmap1, fmap2, num_levels=4, radius=3):
+        self.num_levels = num_levels
+        self.radius = radius
+        self.corr_pyramid = []
+        # all pairs correlation
+        corr = CorrBlock.corr(fmap1, fmap2)
+        batch, num, h1, w1, h2, w2 = corr.shape
+        corr = corr.reshape(batch*num*h1*w1, 1, h2, w2)
+        for i in range(self.num_levels):
+            self.corr_pyramid.append(
+                corr.view(batch*num, h1, w1, h2//2**i, w2//2**i))
+            corr = F.avg_pool2d(corr, 2, stride=2)
+    def __call__(self, coords):
+        out_pyramid = []
+        batch, num, ht, wd, _ = coords.shape
+        coords = coords.permute(0,1,4,2,3)
+        coords = coords.contiguous().view(batch*num, 2, ht, wd)
+        for i in range(self.num_levels):
+            corr = CorrSampler.apply(self.corr_pyramid[i], coords/2**i, self.radius)
+            out_pyramid.append(corr.view(batch, num, -1, ht, wd))
+        return torch.cat(out_pyramid, dim=2)
+    def cat(self, other):
+        for i in range(self.num_levels):
+            self.corr_pyramid[i] = torch.cat([self.corr_pyramid[i], other.corr_pyramid[i]], 0)
+        return self
+    def __getitem__(self, index):
+        for i in range(self.num_levels):
+            self.corr_pyramid[i] = self.corr_pyramid[i][index]
+        return self
+    @staticmethod
+    def corr(fmap1, fmap2):
+        """ all-pairs correlation """
+        batch, num, dim, ht, wd = fmap1.shape
+        fmap1 = fmap1.reshape(batch*num, dim, ht*wd) / 4.0
+        fmap2 = fmap2.reshape(batch*num, dim, ht*wd) / 4.0
+        corr = torch.matmul(fmap1.transpose(1,2), fmap2)
+        return corr.view(batch, num, ht, wd, ht, wd)
+class CorrLayer(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, fmap1, fmap2, coords, r):
+        ctx.r = r
+        ctx.save_for_backward(fmap1, fmap2, coords)
+        corr, = droid_backends.altcorr_forward(fmap1, fmap2, coords, ctx.r)
+        return corr
+    @staticmethod
+    def backward(ctx, grad_corr):
+        fmap1, fmap2, coords = ctx.saved_tensors
+        grad_corr = grad_corr.contiguous()
+        fmap1_grad, fmap2_grad, coords_grad = \
+            droid_backends.altcorr_backward(fmap1, fmap2, coords, grad_corr, ctx.r)
+        return fmap1_grad, fmap2_grad, coords_grad, None
+class AltCorrBlock:
+    def __init__(self, fmaps, num_levels=4, radius=3):
+        self.num_levels = num_levels
+        self.radius = radius
+        B, N, C, H, W = fmaps.shape
+        fmaps = fmaps.view(B*N, C, H, W) / 4.0
+        self.pyramid = []
+        for i in range(self.num_levels):
+            sz = (B, N, H//2**i, W//2**i, C)
+            fmap_lvl = fmaps.permute(0, 2, 3, 1).contiguous()
+            self.pyramid.append(fmap_lvl.view(*sz))
+            fmaps = F.avg_pool2d(fmaps, 2, stride=2)
+    def corr_fn(self, coords, ii, jj):
+        B, N, H, W, S, _ = coords.shape
+        coords = coords.permute(0, 1, 4, 2, 3, 5)
+        corr_list = []
+        for i in range(self.num_levels):
+            r = self.radius
+            fmap1_i = self.pyramid[0][:, ii]
+            fmap2_i = self.pyramid[i][:, jj]
+            coords_i = (coords / 2**i).reshape(B*N, S, H, W, 2).contiguous()
+            fmap1_i = fmap1_i.reshape((B*N,) + fmap1_i.shape[2:])
+            fmap2_i = fmap2_i.reshape((B*N,) + fmap2_i.shape[2:])
+            corr = CorrLayer.apply(fmap1_i.float(), fmap2_i.float(), coords_i, self.radius)
+            corr = corr.view(B, N, S, -1, H, W).permute(0, 1, 3, 4, 5, 2)
+            corr_list.append(corr)
+        corr = torch.cat(corr_list, dim=2)
+        return corr
+    def __call__(self, coords, ii, jj):
+        squeeze_output = False
+        if len(coords.shape) == 5:
+            coords = coords.unsqueeze(dim=-2)
+            squeeze_output = True
+        corr = self.corr_fn(coords, ii, jj)
+        if squeeze_output:
+            corr = corr.squeeze(dim=-1)
+        return corr.contiguous()

thirdparty/DROID-SLAM/droid_slam/modules/extractor.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ResidualBlock(nn.Module):
+    def __init__(self, in_planes, planes, norm_fn='group', stride=1):
+        super(ResidualBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+        num_groups = planes // 8
+        if norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            if not stride == 1:
+                self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+        elif norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(planes)
+            self.norm2 = nn.BatchNorm2d(planes)
+            if not stride == 1:
+                self.norm3 = nn.BatchNorm2d(planes)
+        elif norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(planes)
+            self.norm2 = nn.InstanceNorm2d(planes)
+            if not stride == 1:
+                self.norm3 = nn.InstanceNorm2d(planes)
+        elif norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+            self.norm2 = nn.Sequential()
+            if not stride == 1:
+                self.norm3 = nn.Sequential()
+        if stride == 1:
+            self.downsample = None
+        else:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3)
+    def forward(self, x):
+        y = x
+        y = self.relu(self.norm1(self.conv1(y)))
+        y = self.relu(self.norm2(self.conv2(y)))
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return self.relu(x+y)
+class BottleneckBlock(nn.Module):
+    def __init__(self, in_planes, planes, norm_fn='group', stride=1):
+        super(BottleneckBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes//4, kernel_size=1, padding=0)
+        self.conv2 = nn.Conv2d(planes//4, planes//4, kernel_size=3, padding=1, stride=stride)
+        self.conv3 = nn.Conv2d(planes//4, planes, kernel_size=1, padding=0)
+        self.relu = nn.ReLU(inplace=True)
+        num_groups = planes // 8
+        if norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
+            self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
+            self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            if not stride == 1:
+                self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+        elif norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(planes//4)
+            self.norm2 = nn.BatchNorm2d(planes//4)
+            self.norm3 = nn.BatchNorm2d(planes)
+            if not stride == 1:
+                self.norm4 = nn.BatchNorm2d(planes)
+        elif norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(planes//4)
+            self.norm2 = nn.InstanceNorm2d(planes//4)
+            self.norm3 = nn.InstanceNorm2d(planes)
+            if not stride == 1:
+                self.norm4 = nn.InstanceNorm2d(planes)
+        elif norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+            self.norm2 = nn.Sequential()
+            self.norm3 = nn.Sequential()
+            if not stride == 1:
+                self.norm4 = nn.Sequential()
+        if stride == 1:
+            self.downsample = None
+        else:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4)
+    def forward(self, x):
+        y = x
+        y = self.relu(self.norm1(self.conv1(y)))
+        y = self.relu(self.norm2(self.conv2(y)))
+        y = self.relu(self.norm3(self.conv3(y)))
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return self.relu(x+y)
+DIM=32
+class BasicEncoder(nn.Module):
+    def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0, multidim=False):
+        super(BasicEncoder, self).__init__()
+        self.norm_fn = norm_fn
+        self.multidim = multidim
+        if self.norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=8, num_channels=DIM)
+        elif self.norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(DIM)
+        elif self.norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(DIM)
+        elif self.norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+        self.conv1 = nn.Conv2d(3, DIM, kernel_size=7, stride=2, padding=3)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.in_planes = DIM
+        self.layer1 = self._make_layer(DIM,  stride=1)
+        self.layer2 = self._make_layer(2*DIM, stride=2)
+        self.layer3 = self._make_layer(4*DIM, stride=2)
+        # output convolution
+        self.conv2 = nn.Conv2d(4*DIM, output_dim, kernel_size=1)
+        if self.multidim:
+            self.layer4 = self._make_layer(256, stride=2)
+            self.layer5 = self._make_layer(512, stride=2)
+            self.in_planes = 256
+            self.layer6 = self._make_layer(256, stride=1)
+            self.in_planes = 128
+            self.layer7 = self._make_layer(128, stride=1)
+            self.up1 = nn.Conv2d(512, 256, 1)
+            self.up2 = nn.Conv2d(256, 128, 1)
+            self.conv3 = nn.Conv2d(128, output_dim, kernel_size=1)
+        if dropout > 0:
+            self.dropout = nn.Dropout2d(p=dropout)
+        else:
+            self.dropout = None
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
+                if m.weight is not None:
+                    nn.init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def _make_layer(self, dim, stride=1):
+        layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
+        layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
+        layers = (layer1, layer2)
+        self.in_planes = dim
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        b, n, c1, h1, w1 = x.shape
+        x = x.view(b*n, c1, h1, w1)
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.relu1(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.conv2(x)
+        _, c2, h2, w2 = x.shape
+        return x.view(b, n, c2, h2, w2)

thirdparty/DROID-SLAM/droid_slam/modules/gru.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import torch
+import torch.nn as nn
+class ConvGRU(nn.Module):
+    def __init__(self, h_planes=128, i_planes=128):
+        super(ConvGRU, self).__init__()
+        self.do_checkpoint = False
+        self.convz = nn.Conv2d(h_planes+i_planes, h_planes, 3, padding=1)
+        self.convr = nn.Conv2d(h_planes+i_planes, h_planes, 3, padding=1)
+        self.convq = nn.Conv2d(h_planes+i_planes, h_planes, 3, padding=1)
+        self.w = nn.Conv2d(h_planes, h_planes, 1, padding=0)
+        self.convz_glo = nn.Conv2d(h_planes, h_planes, 1, padding=0)
+        self.convr_glo = nn.Conv2d(h_planes, h_planes, 1, padding=0)
+        self.convq_glo = nn.Conv2d(h_planes, h_planes, 1, padding=0)
+    def forward(self, net, *inputs):
+        inp = torch.cat(inputs, dim=1)
+        net_inp = torch.cat([net, inp], dim=1)
+        b, c, h, w = net.shape
+        glo = torch.sigmoid(self.w(net)) * net
+        glo = glo.view(b, c, h*w).mean(-1).view(b, c, 1, 1)
+        z = torch.sigmoid(self.convz(net_inp) + self.convz_glo(glo))
+        r = torch.sigmoid(self.convr(net_inp) + self.convr_glo(glo))
+        q = torch.tanh(self.convq(torch.cat([r*net, inp], dim=1)) + self.convq_glo(glo))
+        net = (1-z) * net + z * q
+        return net

thirdparty/DROID-SLAM/droid_slam/motion_filter.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import cv2
+import torch
+import lietorch
+from collections import OrderedDict
+from droid_net import DroidNet
+import geom.projective_ops as pops
+from modules.corr import CorrBlock
+class MotionFilter:
+    """ This class is used to filter incoming frames and extract features """
+    def __init__(self, net, video, thresh=2.5, device="cuda:0"):
+        # split net modules
+        self.cnet = net.cnet
+        self.fnet = net.fnet
+        self.update = net.update
+        self.video = video
+        self.thresh = thresh
+        self.device = device
+        self.count = 0
+        # mean, std for image normalization
+        self.MEAN = torch.as_tensor([0.485, 0.456, 0.406], device=self.device)[:, None, None]
+        self.STDV = torch.as_tensor([0.229, 0.224, 0.225], device=self.device)[:, None, None]
+    @torch.cuda.amp.autocast(enabled=True)
+    def __context_encoder(self, image):
+        """ context features """
+        net, inp = self.cnet(image).split([128,128], dim=2)
+        return net.tanh().squeeze(0), inp.relu().squeeze(0)
+    @torch.cuda.amp.autocast(enabled=True)
+    def __feature_encoder(self, image):
+        """ features for correlation volume """
+        return self.fnet(image).squeeze(0)
+    @torch.cuda.amp.autocast(enabled=True)
+    @torch.no_grad()
+    def track(self, tstamp, image, depth=None, intrinsics=None, mask=None):
+        """ main update operation - run on every frame in video """
+        Id = lietorch.SE3.Identity(1,).data.squeeze()
+        ht = image.shape[-2] // 8
+        wd = image.shape[-1] // 8
+        # normalize images
+        inputs = image[None, :, [2,1,0]].to(self.device) / 255.0
+        inputs = inputs.sub_(self.MEAN).div_(self.STDV)
+        # extract features
+        gmap = self.__feature_encoder(inputs) # [1, 128, gh, gw]
+        if mask is None:
+            mask = torch.zeros([gmap.shape[-2], gmap.shape[-1]]).to(gmap)
+        # if mask is not None:
+        #     # bias = self.fnet.conv2.bias.detach().clone().half()
+        #     # gmap[:,:,mask>0.0] = bias[:, None].repeat(1, (mask>0.0).sum())
+        #     gmap[:,:,mask>0.0] = 0
+        ### always add first frame to the depth video ###
+        if self.video.counter.value == 0:
+            net, inp = self.__context_encoder(inputs[:,[0]])
+            self.net, self.inp, self.fmap = net, inp, gmap
+            self.video.append(tstamp, image[0], Id, 1.0, depth, intrinsics / 8.0, gmap, net[0,0], inp[0,0], mask)
+            # msk: torch.Size([64, 48])
+            # gmap: torch.Size([1, 128, 64, 48])
+            # net: torch.Size([1, 128, 64, 48])
+            # inp: torch.Size([1, 128, 64, 48])
+        ### only add new frame if there is enough motion ###
+        else:
+            # index correlation volume
+            coords0 = pops.coords_grid(ht, wd, device=self.device)[None,None]
+            corr = CorrBlock(self.fmap[None,[0]], gmap[None,[0]])(coords0)
+            # approximate flow magnitude using 1 update iteration
+            _, delta, weight = self.update(self.net[None], self.inp[None], corr)
+            # check motion magnitue / add new frame to video
+            if delta.norm(dim=-1).mean().item() > self.thresh:
+                self.count = 0
+                net, inp = self.__context_encoder(inputs[:,[0]])
+                self.net, self.inp, self.fmap = net, inp, gmap
+                self.video.append(tstamp, image[0], None, None, depth, intrinsics / 8.0, gmap, net[0], inp[0], mask)
+            else:
+                self.count += 1

thirdparty/DROID-SLAM/droid_slam/trajectory_filler.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import cv2
+import torch
+import lietorch
+from lietorch import SE3
+from collections import OrderedDict
+from factor_graph import FactorGraph
+from droid_net import DroidNet
+import geom.projective_ops as pops
+class PoseTrajectoryFiller:
+    """ This class is used to fill in non-keyframe poses """
+    def __init__(self, net, video, device="cuda:0"):
+        # split net modules
+        self.cnet = net.cnet
+        self.fnet = net.fnet
+        self.update = net.update
+        self.count = 0
+        self.video = video
+        self.device = device
+        # mean, std for image normalization
+        self.MEAN = torch.as_tensor([0.485, 0.456, 0.406], device=self.device)[:, None, None]
+        self.STDV = torch.as_tensor([0.229, 0.224, 0.225], device=self.device)[:, None, None]
+    @torch.cuda.amp.autocast(enabled=True)
+    def __feature_encoder(self, image):
+        """ features for correlation volume """
+        return self.fnet(image)
+    def __fill(self, tstamps, images, intrinsics):
+        """ fill operator """
+        tt = torch.as_tensor(tstamps, device="cuda")
+        images = torch.stack(images, 0)
+        intrinsics = torch.stack(intrinsics, 0)
+        inputs = images[:,:,[2,1,0]].to(self.device) / 255.0
+        ### linear pose interpolation ###
+        N = self.video.counter.value  # number of keyframes
+        M = len(tstamps)              # 16 frames to fill
+        ts = self.video.tstamp[:N]        # tstamp of keyframes
+        Ps = SE3(self.video.poses[:N])    # pose of keyframes
+        t0 = torch.as_tensor([ts[ts<=t].shape[0] - 1 for t in tstamps])
+        t1 = torch.where(t0<N-1, t0+1, t0)
+        dt = ts[t1] - ts[t0] + 1e-3
+        dP = Ps[t1] * Ps[t0].inv()
+        v = dP.log() / dt.unsqueeze(-1)
+        w = v * (tt - ts[t0]).unsqueeze(-1)
+        Gs = SE3.exp(w) * Ps[t0]
+        # extract features (no need for context features)
+        inputs = inputs.sub_(self.MEAN).div_(self.STDV)
+        fmap = self.__feature_encoder(inputs)
+        self.video.counter.value += M
+        self.video[N:N+M] = (tt, images[:,0], Gs.data, 1, None, intrinsics / 8.0, fmap)
+        # print('t0:', t0, 't1:', t1)
+        # print('tt:', tt.shape, '\n', tt)
+        # self.video.append(tstamp, image[0], Id, 1.0, depth, intrinsics / 8.0, gmap, net[0,0], inp[0,0], mask)
+        # self.video.append(tstamp, image[0], None, None, depth, intrinsics / 8.0, gmap, net[0], inp[0], mask)
+        graph = FactorGraph(self.video, self.update)
+        graph.add_factors(t0.cuda(), torch.arange(N, N+M).cuda())
+        graph.add_factors(t1.cuda(), torch.arange(N, N+M).cuda())
+        # print('graph.ii:', graph.ii)
+        # print('graph.jj:', graph.jj)
+        # print()
+        for itr in range(6):
+            graph.update(N, N+M, motion_only=True)
+        Gs = SE3(self.video.poses[N:N+M].clone())
+        self.video.counter.value -= M
+        return [ Gs ]
+    @torch.no_grad()
+    def __call__(self, image_stream):
+        """ fill in poses of non-keyframe images """
+        # store all camera poses
+        pose_list = []
+        tstamps = []
+        images = []
+        intrinsics = []
+        for (tstamp, image, intrinsic) in image_stream:
+            tstamps.append(tstamp)
+            images.append(image)
+            intrinsics.append(intrinsic)
+            if len(tstamps) == 16:
+                pose_list += self.__fill(tstamps, images, intrinsics)
+                tstamps, images, intrinsics = [], [], []
+        if len(tstamps) > 0:
+            pose_list += self.__fill(tstamps, images, intrinsics)
+        # stitch pose segments together
+        return lietorch.cat(pose_list, 0)

thirdparty/DROID-SLAM/droid_slam/vis_headless.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import torch
+import cv2
+import lietorch
+import droid_backends
+import time
+import argparse
+import numpy as np
+# import os
+# os.environ['PYOPENGL_PLATFORM'] = 'egl'
+#os.environ['PYOPENGL_PLATFORM'] = 'osmesa'
+import open3d as o3d
+# o3d.visualization.webrtc_server.enable_webrtc()
+from lietorch import SE3
+import geom.projective_ops as pops
+CAM_POINTS = np.array([
+        [ 0,   0,   0],
+        [-1,  -1, 1.5],
+        [ 1,  -1, 1.5],
+        [ 1,   1, 1.5],
+        [-1,   1, 1.5],
+        [-0.5, 1, 1.5],
+        [ 0.5, 1, 1.5],
+        [ 0, 1.2, 1.5]])
+CAM_LINES = np.array([
+    [1,2], [2,3], [3,4], [4,1], [1,0], [0,2], [3,0], [0,4], [5,7], [7,6]])
+def white_balance(img):
+    # from https://stackoverflow.com/questions/46390779/automatic-white-balancing-with-grayworld-assumption
+    result = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
+    avg_a = np.average(result[:, :, 1])
+    avg_b = np.average(result[:, :, 2])
+    result[:, :, 1] = result[:, :, 1] - ((avg_a - 128) * (result[:, :, 0] / 255.0) * 1.1)
+    result[:, :, 2] = result[:, :, 2] - ((avg_b - 128) * (result[:, :, 0] / 255.0) * 1.1)
+    result = cv2.cvtColor(result, cv2.COLOR_LAB2BGR)
+    return result
+def create_camera_actor(g, scale=0.05):
+    """ build open3d camera polydata """
+    camera_actor = o3d.geometry.LineSet(
+        points=o3d.utility.Vector3dVector(scale * CAM_POINTS),
+        lines=o3d.utility.Vector2iVector(CAM_LINES))
+    color = (g * 1.0, 0.5 * (1-g), 0.9 * (1-g))
+    camera_actor.paint_uniform_color(color)
+    return camera_actor
+def create_point_actor(points, colors):
+    """ open3d point cloud from numpy array """
+    point_cloud = o3d.geometry.PointCloud()
+    point_cloud.points = o3d.utility.Vector3dVector(points)
+    point_cloud.colors = o3d.utility.Vector3dVector(colors)
+    return point_cloud
+def droid_visualization(video, save_path, device="cuda:0"):
+    """ DROID visualization frontend """
+    torch.cuda.set_device(0)
+    droid_visualization.video = video
+    droid_visualization.cameras = {}
+    droid_visualization.points = {}
+    droid_visualization.warmup = 8
+    droid_visualization.scale = 1.0
+    droid_visualization.ix = 0
+    print("headless droid_visualization")
+    droid_visualization.filter_thresh = 0.3  #0.005
+    def increase_filter(vis):
+        droid_visualization.filter_thresh *= 2
+        with droid_visualization.video.get_lock():
+            droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
+    def decrease_filter(vis):
+        droid_visualization.filter_thresh *= 0.5
+        with droid_visualization.video.get_lock():
+            droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
+    def animation_callback(vis):
+        cam = vis.get_view_control().convert_to_pinhole_camera_parameters()
+        with torch.no_grad():
+            with video.get_lock():
+                t = video.counter.value
+                dirty_index, = torch.where(video.dirty.clone())
+                dirty_index = dirty_index
+            if len(dirty_index) == 0:
+                return
+            video.dirty[dirty_index] = False
+            # convert poses to 4x4 matrix
+            poses = torch.index_select(video.poses, 0, dirty_index)
+            disps = torch.index_select(video.disps, 0, dirty_index)
+            Ps = SE3(poses).inv().matrix().cpu().numpy()
+            images = torch.index_select(video.images, 0, dirty_index)
+            images = images.cpu()[:,[2,1,0],3::8,3::8].permute(0,2,3,1) / 255.0
+            points = droid_backends.iproj(SE3(poses).inv().data, disps, video.intrinsics[0]).cpu()
+            thresh = droid_visualization.filter_thresh * torch.ones_like(disps.mean(dim=[1,2]))
+            count = droid_backends.depth_filter(
+                video.poses, video.disps, video.intrinsics[0], dirty_index, thresh)
+            count = count.cpu()
+            disps = disps.cpu()
+            masks = ((count >= 2) & (disps > .5*disps.mean(dim=[1,2], keepdim=True)))
+            for i in range(len(dirty_index)):
+                pose = Ps[i]
+                ix = dirty_index[i].item()
+                if ix in droid_visualization.cameras:
+                    vis.remove_geometry(droid_visualization.cameras[ix])
+                    del droid_visualization.cameras[ix]
+                if ix in droid_visualization.points:
+                    vis.remove_geometry(droid_visualization.points[ix])
+                    del droid_visualization.points[ix]
+                ### add camera actor ###
+                cam_actor = create_camera_actor(True)
+                cam_actor.transform(pose)
+                vis.add_geometry(cam_actor)
+                droid_visualization.cameras[ix] = cam_actor
+                mask = masks[i].reshape(-1)
+                pts = points[i].reshape(-1, 3)[mask].cpu().numpy()
+                clr = images[i].reshape(-1, 3)[mask].cpu().numpy()
+                ## add point actor ###
+                point_actor = create_point_actor(pts, clr)
+                vis.add_geometry(point_actor)
+                droid_visualization.points[ix] = point_actor
+            ### Hack to save Point Cloud Data and Camnera results ###
+            # Save points
+            pcd_points = o3d.geometry.PointCloud()
+            for p in droid_visualization.points.items():
+                pcd_points += p[1]
+            o3d.io.write_point_cloud(f"{save_path}/points.ply", pcd_points, write_ascii=False)
+            # Save pose
+            pcd_camera = create_camera_actor(True)
+            for c in droid_visualization.cameras.items():
+                pcd_camera += c[1]
+            o3d.io.write_line_set(f"{save_path}/camera.ply", pcd_camera, write_ascii=False)
+            ### end ###
+            # hack to allow interacting with vizualization during inference
+            if len(droid_visualization.cameras) >= droid_visualization.warmup:
+                cam = vis.get_view_control().convert_from_pinhole_camera_parameters(cam)
+            droid_visualization.ix += 1
+            vis.poll_events()
+            vis.update_renderer()
+    ### create Open3D visualization ###
+    vis = o3d.visualization.VisualizerWithKeyCallback()
+    vis.register_animation_callback(animation_callback)
+    vis.register_key_callback(ord("S"), increase_filter)
+    vis.register_key_callback(ord("A"), decrease_filter)
+    vis.create_window(height=540, width=960)
+    # vis.create_window(height=512, width=384)
+    vis.get_render_option().load_from_json("thirdparty/DROID-SLAM//misc/renderoption.json")
+    vis.run()
+    vis.destroy_window()

thirdparty/DROID-SLAM/droid_slam/visualization.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import torch
+import cv2
+import lietorch
+import droid_backends
+import time
+import argparse
+import numpy as np
+import open3d as o3d
+from lietorch import SE3
+import geom.projective_ops as pops
+CAM_POINTS = np.array([
+        [ 0,   0,   0],
+        [-1,  -1, 1.5],
+        [ 1,  -1, 1.5],
+        [ 1,   1, 1.5],
+        [-1,   1, 1.5],
+        [-0.5, 1, 1.5],
+        [ 0.5, 1, 1.5],
+        [ 0, 1.2, 1.5]])
+CAM_LINES = np.array([
+    [1,2], [2,3], [3,4], [4,1], [1,0], [0,2], [3,0], [0,4], [5,7], [7,6]])
+def white_balance(img):
+    # from https://stackoverflow.com/questions/46390779/automatic-white-balancing-with-grayworld-assumption
+    result = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
+    avg_a = np.average(result[:, :, 1])
+    avg_b = np.average(result[:, :, 2])
+    result[:, :, 1] = result[:, :, 1] - ((avg_a - 128) * (result[:, :, 0] / 255.0) * 1.1)
+    result[:, :, 2] = result[:, :, 2] - ((avg_b - 128) * (result[:, :, 0] / 255.0) * 1.1)
+    result = cv2.cvtColor(result, cv2.COLOR_LAB2BGR)
+    return result
+def create_camera_actor(g, scale=0.05):
+    """ build open3d camera polydata """
+    camera_actor = o3d.geometry.LineSet(
+        points=o3d.utility.Vector3dVector(scale * CAM_POINTS),
+        lines=o3d.utility.Vector2iVector(CAM_LINES))
+    color = (g * 1.0, 0.5 * (1-g), 0.9 * (1-g))
+    camera_actor.paint_uniform_color(color)
+    return camera_actor
+def create_point_actor(points, colors):
+    """ open3d point cloud from numpy array """
+    point_cloud = o3d.geometry.PointCloud()
+    point_cloud.points = o3d.utility.Vector3dVector(points)
+    point_cloud.colors = o3d.utility.Vector3dVector(colors)
+    return point_cloud
+def droid_visualization(video, device="cuda:0"):
+    """ DROID visualization frontend """
+    torch.cuda.set_device(device)
+    droid_visualization.video = video
+    droid_visualization.cameras = {}
+    droid_visualization.points = {}
+    droid_visualization.warmup = 8
+    droid_visualization.scale = 1.0
+    droid_visualization.ix = 0
+    droid_visualization.filter_thresh = 0.005
+    def increase_filter(vis):
+        droid_visualization.filter_thresh *= 2
+        with droid_visualization.video.get_lock():
+            droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
+    def decrease_filter(vis):
+        droid_visualization.filter_thresh *= 0.5
+        with droid_visualization.video.get_lock():
+            droid_visualization.video.dirty[:droid_visualization.video.counter.value] = True
+    #file dialog based pointcloud export added#
+    def export_pointcloud(vis):
+        gui.Application.instance.initialize()
+        window = gui.Application.instance.create_window("Export", 350, 600)
+        def _on_filedlg_cancel():
+            window.close_dialog()
+            window.close()
+            gui.Application.instance.quit()
+        def _on_filedlg_done(path):
+            pcd_export(path)
+            window.close_dialog()
+            gui.Application.instance.quit()
+        def exec_file_dialog():
+            filedlg = gui.FileDialog(gui.FileDialog.SAVE, "Select file", window.theme)
+            filedlg.add_filter(".ply .xyz .pcd", "PointCloud (.xyz .ply .pcd)")
+            filedlg.add_filter("", "All files")
+            filedlg.set_on_cancel(_on_filedlg_cancel)
+            filedlg.set_on_done(_on_filedlg_done)
+            window.show_dialog(filedlg)
+        def pcd_export(path):
+            print("\nExporting pointcloud as", path)
+            final_pcd = o3d.geometry.PointCloud()
+            for p in droid_visualization.points.items():
+                final_pcd += p[1]
+            o3d.io.write_point_cloud(path, final_pcd, write_ascii=False)
+            #vis.capture_depth_point_cloud("/home/bertuser/droidslam_export.ply")
+        exec_file_dialog()
+    def animation_callback(vis):
+        cam = vis.get_view_control().convert_to_pinhole_camera_parameters()
+        with torch.no_grad():
+            with video.get_lock():
+                t = video.counter.value
+                dirty_index, = torch.where(video.dirty.clone())
+                dirty_index = dirty_index
+            if len(dirty_index) == 0:
+                return
+            video.dirty[dirty_index] = False
+            # convert poses to 4x4 matrix
+            poses = torch.index_select(video.poses, 0, dirty_index)
+            disps = torch.index_select(video.disps, 0, dirty_index)
+            Ps = SE3(poses).inv().matrix().cpu().numpy()
+            images = torch.index_select(video.images, 0, dirty_index)
+            images = images.cpu()[:,[2,1,0],3::8,3::8].permute(0,2,3,1) / 255.0
+            points = droid_backends.iproj(SE3(poses).inv().data, disps, video.intrinsics[0]).cpu()
+            thresh = droid_visualization.filter_thresh * torch.ones_like(disps.mean(dim=[1,2]))
+            count = droid_backends.depth_filter(
+                video.poses, video.disps, video.intrinsics[0], dirty_index, thresh)
+            count = count.cpu()
+            disps = disps.cpu()
+            masks = ((count >= 2) & (disps > .5*disps.mean(dim=[1,2], keepdim=True)))
+            for i in range(len(dirty_index)):
+                pose = Ps[i]
+                ix = dirty_index[i].item()
+                if ix in droid_visualization.cameras:
+                    vis.remove_geometry(droid_visualization.cameras[ix])
+                    del droid_visualization.cameras[ix]
+                if ix in droid_visualization.points:
+                    vis.remove_geometry(droid_visualization.points[ix])
+                    del droid_visualization.points[ix]
+                ### add camera actor ###
+                cam_actor = create_camera_actor(True)
+                cam_actor.transform(pose)
+                vis.add_geometry(cam_actor)
+                droid_visualization.cameras[ix] = cam_actor
+                mask = masks[i].reshape(-1)
+                pts = points[i].reshape(-1, 3)[mask].cpu().numpy()
+                clr = images[i].reshape(-1, 3)[mask].cpu().numpy()
+                ## add point actor ###
+                point_actor = create_point_actor(pts, clr)
+                vis.add_geometry(point_actor)
+                droid_visualization.points[ix] = point_actor
+            # hack to allow interacting with vizualization during inference
+            if len(droid_visualization.cameras) >= droid_visualization.warmup:
+                cam = vis.get_view_control().convert_from_pinhole_camera_parameters(cam)
+            droid_visualization.ix += 1
+            vis.poll_events()
+            vis.update_renderer()
+    ### create Open3D visualization ###
+    vis = o3d.visualization.VisualizerWithKeyCallback()
+    vis.register_animation_callback(animation_callback)
+    vis.register_key_callback(ord("S"), increase_filter)
+    vis.register_key_callback(ord("A"), decrease_filter)
+    vis.create_window(height=540, width=960)
+    vis.get_render_option().load_from_json("misc/renderoption.json")
+    vis.run()
+    vis.destroy_window()

thirdparty/DROID-SLAM/environment.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+name: droidenv
+channels:
+  - rusty1s
+  - pytorch
+  - open3d-admin
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - pytorch-scatter
+  - torchaudio
+  - torchvision
+  - open3d
+  - pytorch=1.10
+  - cudatoolkit=11.3
+  - tensorboard
+  - scipy
+  - opencv
+  - tqdm
+  - suitesparse
+  - matplotlib
+  - pyyaml

thirdparty/DROID-SLAM/environment_novis.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: droidenv
+channels:
+  - rusty1s
+  - pytorch
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - pytorch-scatter
+  - torchaudio
+  - torchvision
+  - pytorch=1.10
+  - cudatoolkit=11.3
+  - tensorboard
+  - scipy
+  - opencv
+  - tqdm
+  - suitesparse
+  - matplotlib
+  - pyyaml

thirdparty/DROID-SLAM/evaluation_scripts/test_eth3d.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import sys
+sys.path.append('droid_slam')
+from tqdm import tqdm
+import numpy as np
+import torch
+import lietorch
+import cv2
+import os
+import glob
+import time
+import argparse
+import torch.nn.functional as F
+from droid import Droid
+import matplotlib.pyplot as plt
+def show_image(image):
+    image = image.permute(1, 2, 0).cpu().numpy()
+    cv2.imshow('image', image / 255.0)
+    cv2.waitKey(1)
+def image_stream(datapath, use_depth=False, stride=1):
+    """ image generator """
+    fx, fy, cx, cy = np.loadtxt(os.path.join(datapath, 'calibration.txt')).tolist()
+    image_list = sorted(glob.glob(os.path.join(datapath, 'rgb', '*.png')))[::stride]
+    depth_list = sorted(glob.glob(os.path.join(datapath, 'depth', '*.png')))[::stride]
+    for t, (image_file, depth_file) in enumerate(zip(image_list, depth_list)):
+        image = cv2.imread(image_file)
+        depth = cv2.imread(depth_file, cv2.IMREAD_ANYDEPTH) / 5000.0
+        h0, w0, _ = image.shape
+        h1 = int(h0 * np.sqrt((384 * 512) / (h0 * w0)))
+        w1 = int(w0 * np.sqrt((384 * 512) / (h0 * w0)))
+        image = cv2.resize(image, (w1, h1))
+        image = image[:h1-h1%8, :w1-w1%8]
+        image = torch.as_tensor(image).permute(2, 0, 1)
+        depth = torch.as_tensor(depth)
+        depth = F.interpolate(depth[None,None], (h1, w1)).squeeze()
+        depth = depth[:h1-h1%8, :w1-w1%8]
+        intrinsics = torch.as_tensor([fx, fy, cx, cy])
+        intrinsics[0::2] *= (w1 / w0)
+        intrinsics[1::2] *= (h1 / h0)
+        if use_depth:
+            yield t, image[None], depth, intrinsics
+        else:
+            yield t, image[None], intrinsics
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--datapath")
+    parser.add_argument("--weights", default="droid.pth")
+    parser.add_argument("--buffer", type=int, default=1024)
+    parser.add_argument("--image_size", default=[240, 320])
+    parser.add_argument("--disable_vis", action="store_true")
+    parser.add_argument("--beta", type=float, default=0.5)
+    parser.add_argument("--filter_thresh", type=float, default=2.0)
+    parser.add_argument("--warmup", type=int, default=8)
+    parser.add_argument("--keyframe_thresh", type=float, default=3.5)
+    parser.add_argument("--frontend_thresh", type=float, default=16.0)
+    parser.add_argument("--frontend_window", type=int, default=16)
+    parser.add_argument("--frontend_radius", type=int, default=1)
+    parser.add_argument("--frontend_nms", type=int, default=0)
+    parser.add_argument("--stereo", action="store_true")
+    parser.add_argument("--depth", action="store_true")
+    parser.add_argument("--backend_thresh", type=float, default=22.0)
+    parser.add_argument("--backend_radius", type=int, default=2)
+    parser.add_argument("--backend_nms", type=int, default=3)
+    args = parser.parse_args()
+    torch.multiprocessing.set_start_method('spawn')
+    print("Running evaluation on {}".format(args.datapath))
+    print(args)
+    # this can usually be set to 2-3 except for "camera_shake" scenes
+    # set to 2 for test scenes
+    stride = 1
+    tstamps = []
+    for (t, image, depth, intrinsics) in tqdm(image_stream(args.datapath, use_depth=True, stride=stride)):
+        if not args.disable_vis:
+            show_image(image[0])
+        if t == 0:
+            args.image_size = [image.shape[2], image.shape[3]]
+            droid = Droid(args)
+        droid.track(t, image, depth, intrinsics=intrinsics)
+    traj_est = droid.terminate(image_stream(args.datapath, use_depth=False, stride=stride))
+    ### run evaluation ###
+    print("#"*20 + " Results...")
+    import evo
+    from evo.core.trajectory import PoseTrajectory3D
+    from evo.tools import file_interface
+    from evo.core import sync
+    import evo.main_ape as main_ape
+    from evo.core.metrics import PoseRelation
+    image_path = os.path.join(args.datapath, 'rgb')
+    images_list = sorted(glob.glob(os.path.join(image_path, '*.png')))[::stride]
+    tstamps = [float(x.split('/')[-1][:-4]) for x in images_list]
+    traj_est = PoseTrajectory3D(
+        positions_xyz=traj_est[:,:3],
+        orientations_quat_wxyz=traj_est[:,3:],
+        timestamps=np.array(tstamps))
+    gt_file = os.path.join(args.datapath, 'groundtruth.txt')
+    traj_ref = file_interface.read_tum_trajectory_file(gt_file)
+    traj_ref, traj_est = sync.associate_trajectories(traj_ref, traj_est)
+    result = main_ape.ape(traj_ref, traj_est, est_name='traj',
+        pose_relation=PoseRelation.translation_part, align=True, correct_scale=False)
+    print(result.stats)

thirdparty/DROID-SLAM/evaluation_scripts/test_euroc.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import sys
+sys.path.append('droid_slam')
+from tqdm import tqdm
+import numpy as np
+import torch
+import lietorch
+import cv2
+import os
+import glob
+import time
+import argparse
+from torch.multiprocessing import Process
+from droid import Droid
+import torch.nn.functional as F
+def show_image(image):
+    image = image.permute(1, 2, 0).cpu().numpy()
+    cv2.imshow('image', image / 255.0)
+    cv2.waitKey(1)
+def image_stream(datapath, image_size=[320, 512], stereo=False, stride=1):
+    """ image generator """
+    K_l = np.array([458.654, 0.0, 367.215, 0.0, 457.296, 248.375, 0.0, 0.0, 1.0]).reshape(3,3)
+    d_l = np.array([-0.28340811, 0.07395907, 0.00019359, 1.76187114e-05, 0.0])
+    R_l = np.array([
+         0.999966347530033, -0.001422739138722922, 0.008079580483432283,
+         0.001365741834644127, 0.9999741760894847, 0.007055629199258132,
+        -0.008089410156878961, -0.007044357138835809, 0.9999424675829176
+    ]).reshape(3,3)
+    P_l = np.array([435.2046959714599, 0, 367.4517211914062, 0,  0, 435.2046959714599, 252.2008514404297, 0,  0, 0, 1, 0]).reshape(3,4)
+    map_l = cv2.initUndistortRectifyMap(K_l, d_l, R_l, P_l[:3,:3], (752, 480), cv2.CV_32F)
+    K_r = np.array([457.587, 0.0, 379.999, 0.0, 456.134, 255.238, 0.0, 0.0, 1]).reshape(3,3)
+    d_r = np.array([-0.28368365, 0.07451284, -0.00010473, -3.555907e-05, 0.0]).reshape(5)
+    R_r = np.array([
+         0.9999633526194376, -0.003625811871560086, 0.007755443660172947,
+         0.003680398547259526, 0.9999684752771629, -0.007035845251224894,
+        -0.007729688520722713, 0.007064130529506649, 0.999945173484644
+    ]).reshape(3,3)
+    P_r = np.array([435.2046959714599, 0, 367.4517211914062, -47.90639384423901, 0, 435.2046959714599, 252.2008514404297, 0, 0, 0, 1, 0]).reshape(3,4)
+    map_r = cv2.initUndistortRectifyMap(K_r, d_r, R_r, P_r[:3,:3], (752, 480), cv2.CV_32F)
+    intrinsics_vec = [435.2046959714599, 435.2046959714599, 367.4517211914062, 252.2008514404297]
+    ht0, wd0 = [480, 752]
+    # read all png images in folder
+    images_left = sorted(glob.glob(os.path.join(datapath, 'mav0/cam0/data/*.png')))[::stride]
+    images_right = [x.replace('cam0', 'cam1') for x in images_left]
+    for t, (imgL, imgR) in enumerate(zip(images_left, images_right)):
+        if stereo and not os.path.isfile(imgR):
+            continue
+        tstamp = float(imgL.split('/')[-1][:-4])
+        images = [cv2.remap(cv2.imread(imgL), map_l[0], map_l[1], interpolation=cv2.INTER_LINEAR)]
+        if stereo:
+            images += [cv2.remap(cv2.imread(imgR), map_r[0], map_r[1], interpolation=cv2.INTER_LINEAR)]
+        images = torch.from_numpy(np.stack(images, 0))
+        images = images.permute(0, 3, 1, 2).to("cuda:0", dtype=torch.float32)
+        images = F.interpolate(images, image_size, mode="bilinear", align_corners=False)
+        intrinsics = torch.as_tensor(intrinsics_vec).cuda()
+        intrinsics[0] *= image_size[1] / wd0
+        intrinsics[1] *= image_size[0] / ht0
+        intrinsics[2] *= image_size[1] / wd0
+        intrinsics[3] *= image_size[0] / ht0
+        yield stride*t, images, intrinsics
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--datapath", help="path to euroc sequence")
+    parser.add_argument("--gt", help="path to gt file")
+    parser.add_argument("--weights", default="droid.pth")
+    parser.add_argument("--buffer", type=int, default=512)
+    parser.add_argument("--image_size", default=[320,512])
+    parser.add_argument("--disable_vis", action="store_true")
+    parser.add_argument("--stereo", action="store_true")
+    parser.add_argument("--beta", type=float, default=0.3)
+    parser.add_argument("--filter_thresh", type=float, default=2.4)
+    parser.add_argument("--warmup", type=int, default=15)
+    parser.add_argument("--keyframe_thresh", type=float, default=3.5)
+    parser.add_argument("--frontend_thresh", type=float, default=17.5)
+    parser.add_argument("--frontend_window", type=int, default=20)
+    parser.add_argument("--frontend_radius", type=int, default=2)
+    parser.add_argument("--frontend_nms", type=int, default=1)
+    parser.add_argument("--backend_thresh", type=float, default=24.0)
+    parser.add_argument("--backend_radius", type=int, default=2)
+    parser.add_argument("--backend_nms", type=int, default=2)
+    args = parser.parse_args()
+    torch.multiprocessing.set_start_method('spawn')
+    print("Running evaluation on {}".format(args.datapath))
+    print(args)
+    droid = Droid(args)
+    time.sleep(5)
+    for (t, image, intrinsics) in tqdm(image_stream(args.datapath, stereo=args.stereo, stride=2)):
+        droid.track(t, image, intrinsics=intrinsics)
+    traj_est = droid.terminate(image_stream(args.datapath, stride=1))
+    ### run evaluation ###
+    import evo
+    from evo.core.trajectory import PoseTrajectory3D
+    from evo.tools import file_interface
+    from evo.core import sync
+    import evo.main_ape as main_ape
+    from evo.core.metrics import PoseRelation
+    images_list = sorted(glob.glob(os.path.join(args.datapath, 'mav0/cam0/data/*.png')))
+    tstamps = [float(x.split('/')[-1][:-4]) for x in images_list]
+    traj_est = PoseTrajectory3D(
+        positions_xyz=1.10 * traj_est[:,:3],
+        orientations_quat_wxyz=traj_est[:,3:],
+        timestamps=np.array(tstamps))
+    traj_ref = file_interface.read_tum_trajectory_file(args.gt)
+    traj_ref, traj_est = sync.associate_trajectories(traj_ref, traj_est)
+    result = main_ape.ape(traj_ref, traj_est, est_name='traj',
+        pose_relation=PoseRelation.translation_part, align=True, correct_scale=True)
+    print(result)

thirdparty/DROID-SLAM/evaluation_scripts/test_tum.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import sys
+sys.path.append('droid_slam')
+from tqdm import tqdm
+import numpy as np
+import torch
+import lietorch
+import cv2
+import os
+import glob
+import time
+import argparse
+import torch.nn.functional as F
+from droid import Droid
+def show_image(image):
+    image = image.permute(1, 2, 0).cpu().numpy()
+    cv2.imshow('image', image / 255.0)
+    cv2.waitKey(1)
+def image_stream(datapath, image_size=[320, 512]):
+    """ image generator """
+    fx, fy, cx, cy = 517.3, 516.5, 318.6, 255.3
+    K_l = np.array([fx, 0.0, cx, 0.0, fy, cy, 0.0, 0.0, 1.0]).reshape(3,3)
+    d_l = np.array([0.2624, -0.9531, -0.0054, 0.0026, 1.1633])
+    # read all png images in folder
+    images_list = sorted(glob.glob(os.path.join(datapath, 'rgb', '*.png')))[::2]
+    for t, imfile in enumerate(images_list):
+        image = cv2.imread(imfile)
+        ht0, wd0, _ = image.shape
+        image = cv2.undistort(image, K_l, d_l)
+        image = cv2.resize(image, (320+32, 240+16))
+        image = torch.from_numpy(image).permute(2,0,1)
+        intrinsics = torch.as_tensor([fx, fy, cx, cy]).cuda()
+        intrinsics[0] *= image.shape[2] / 640.0
+        intrinsics[1] *= image.shape[1] / 480.0
+        intrinsics[2] *= image.shape[2] / 640.0
+        intrinsics[3] *= image.shape[1] / 480.0
+        # crop image to remove distortion boundary
+        intrinsics[2] -= 16
+        intrinsics[3] -= 8
+        image = image[:, 8:-8, 16:-16]
+        yield t, image[None], intrinsics
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--datapath")
+    parser.add_argument("--weights", default="droid.pth")
+    parser.add_argument("--buffer", type=int, default=512)
+    parser.add_argument("--image_size", default=[240, 320])
+    parser.add_argument("--disable_vis", action="store_true")
+    parser.add_argument("--beta", type=float, default=0.6)
+    parser.add_argument("--filter_thresh", type=float, default=1.75)
+    parser.add_argument("--warmup", type=int, default=12)
+    parser.add_argument("--keyframe_thresh", type=float, default=2.25)
+    parser.add_argument("--frontend_thresh", type=float, default=12.0)
+    parser.add_argument("--frontend_window", type=int, default=25)
+    parser.add_argument("--frontend_radius", type=int, default=2)
+    parser.add_argument("--frontend_nms", type=int, default=1)
+    parser.add_argument("--backend_thresh", type=float, default=15.0)
+    parser.add_argument("--backend_radius", type=int, default=2)
+    parser.add_argument("--backend_nms", type=int, default=3)
+    args = parser.parse_args()
+    args.stereo = False
+    torch.multiprocessing.set_start_method('spawn')
+    print("Running evaluation on {}".format(args.datapath))
+    print(args)
+    droid = Droid(args)
+    time.sleep(5)
+    tstamps = []
+    for (t, image, intrinsics) in tqdm(image_stream(args.datapath)):
+        if not args.disable_vis:
+            show_image(image)
+        droid.track(t, image, intrinsics=intrinsics)
+    traj_est = droid.terminate(image_stream(args.datapath))
+    ### run evaluation ###
+    print("#"*20 + " Results...")
+    import evo
+    from evo.core.trajectory import PoseTrajectory3D
+    from evo.tools import file_interface
+    from evo.core import sync
+    import evo.main_ape as main_ape
+    from evo.core.metrics import PoseRelation
+    image_path = os.path.join(args.datapath, 'rgb')
+    images_list = sorted(glob.glob(os.path.join(image_path, '*.png')))[::2]
+    tstamps = [float(x.split('/')[-1][:-4]) for x in images_list]
+    traj_est = PoseTrajectory3D(
+        positions_xyz=traj_est[:,:3],
+        orientations_quat_wxyz=traj_est[:,3:],
+        timestamps=np.array(tstamps))
+    gt_file = os.path.join(args.datapath, 'groundtruth.txt')
+    traj_ref = file_interface.read_tum_trajectory_file(gt_file)
+    traj_ref, traj_est = sync.associate_trajectories(traj_ref, traj_est)
+    result = main_ape.ape(traj_ref, traj_est, est_name='traj',
+        pose_relation=PoseRelation.translation_part, align=True, correct_scale=True)
+    print(result)

thirdparty/DROID-SLAM/evaluation_scripts/validate_tartanair.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import sys
+sys.path.append('droid_slam')
+sys.path.append('thirdparty/tartanair_tools')
+from tqdm import tqdm
+import numpy as np
+import torch
+import lietorch
+import cv2
+import os
+import glob
+import time
+import yaml
+import argparse
+from droid import Droid
+def image_stream(datapath, image_size=[384, 512], intrinsics_vec=[320.0, 320.0, 320.0, 240.0], stereo=False):
+    """ image generator """
+    # read all png images in folder
+    ht0, wd0 = [480, 640]
+    images_left = sorted(glob.glob(os.path.join(datapath, 'image_left/*.png')))
+    images_right = sorted(glob.glob(os.path.join(datapath, 'image_right/*.png')))
+    data = []
+    for t in range(len(images_left)):
+        images = [ cv2.resize(cv2.imread(images_left[t]), (image_size[1], image_size[0])) ]
+        if stereo:
+            images += [ cv2.resize(cv2.imread(images_right[t]), (image_size[1], image_size[0])) ]
+        images = torch.from_numpy(np.stack(images, 0)).permute(0,3,1,2)
+        intrinsics = .8 * torch.as_tensor(intrinsics_vec)
+        data.append((t, images, intrinsics))
+    return data
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--datapath", default="datasets/TartanAir")
+    parser.add_argument("--weights", default="droid.pth")
+    parser.add_argument("--buffer", type=int, default=1000)
+    parser.add_argument("--image_size", default=[384,512])
+    parser.add_argument("--stereo", action="store_true")
+    parser.add_argument("--disable_vis", action="store_true")
+    parser.add_argument("--plot_curve", action="store_true")
+    parser.add_argument("--id", type=int, default=-1)
+    parser.add_argument("--beta", type=float, default=0.3)
+    parser.add_argument("--filter_thresh", type=float, default=2.4)
+    parser.add_argument("--warmup", type=int, default=12)
+    parser.add_argument("--keyframe_thresh", type=float, default=3.5)
+    parser.add_argument("--frontend_thresh", type=float, default=15)
+    parser.add_argument("--frontend_window", type=int, default=20)
+    parser.add_argument("--frontend_radius", type=int, default=1)
+    parser.add_argument("--frontend_nms", type=int, default=1)
+    parser.add_argument("--backend_thresh", type=float, default=20.0)
+    parser.add_argument("--backend_radius", type=int, default=2)
+    parser.add_argument("--backend_nms", type=int, default=3)
+    args = parser.parse_args()
+    torch.multiprocessing.set_start_method('spawn')
+    from data_readers.tartan import test_split
+    from evaluation.tartanair_evaluator import TartanAirEvaluator
+    if not os.path.isdir("figures"):
+        os.mkdir("figures")
+    if args.id >= 0:
+        test_split = [ test_split[args.id] ]
+    ate_list = []
+    for scene in test_split:
+        print("Performing evaluation on {}".format(scene))
+        torch.cuda.empty_cache()
+        droid = Droid(args)
+        scenedir = os.path.join(args.datapath, scene)
+        for (tstamp, image, intrinsics) in tqdm(image_stream(scenedir, stereo=args.stereo)):
+            droid.track(tstamp, image, intrinsics=intrinsics)
+        # fill in non-keyframe poses + global BA
+        traj_est = droid.terminate(image_stream(scenedir))
+        ### do evaluation ###
+        evaluator = TartanAirEvaluator()
+        gt_file = os.path.join(scenedir, "pose_left.txt")
+        traj_ref = np.loadtxt(gt_file, delimiter=' ')[:, [1, 2, 0, 4, 5, 3, 6]] # ned -> xyz
+        # usually stereo should not be scale corrected, but we are comparing monocular and stereo here
+        results = evaluator.evaluate_one_trajectory(
+            traj_ref, traj_est, scale=True, title=scenedir[-20:].replace('/', '_'))
+        print(results)
+        ate_list.append(results["ate_score"])
+    print("Results")
+    print(ate_list)
+    if args.plot_curve:
+        import matplotlib.pyplot as plt
+        ate = np.array(ate_list)
+        xs = np.linspace(0.0, 1.0, 512)
+        ys = [np.count_nonzero(ate < t) / ate.shape[0] for t in xs]
+        plt.plot(xs, ys)
+        plt.xlabel("ATE [m]")
+        plt.ylabel("% runs")
+        plt.show()

thirdparty/DROID-SLAM/misc/DROID.png ADDED Viewed

Git LFS Details

SHA256: 99fb33606ad6ea92b4512bd843c65b5db654734d570cc5787df467df4c9d8faf
Pointer size: 131 Bytes
Size of remote file: 745 kB

thirdparty/DROID-SLAM/misc/renderoption.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+	"background_color" : [ 1, 1, 1 ],
+	"class_name" : "RenderOption",
+	"default_mesh_color" : [ 0.69999999999999996, 0.69999999999999996, 0.69999999999999996 ],
+	"image_max_depth" : 3000,
+	"image_stretch_option" : 0,
+	"interpolation_option" : 0,
+	"light0_color" : [ 1, 1, 1 ],
+	"light0_diffuse_power" : 20,
+	"light0_position" : [ 0, 0, 20 ],
+	"light0_specular_power" : 2.20000000000000001,
+	"light0_specular_shininess" : 100,
+	"light1_color" : [ 1, 1, 1 ],
+	"light1_diffuse_power" : 0.66000000000000003,
+	"light1_position" : [ 0, 0, 2 ],
+	"light1_specular_power" : 2.20000000000000001,
+	"light1_specular_shininess" : 100,
+	"light2_color" : [ 1, 1, 1 ],
+	"light2_diffuse_power" : 20,
+	"light2_position" : [ 0, 0, -20 ],
+	"light2_specular_power" : 2.20000000000000001,
+	"light2_specular_shininess" : 100,
+	"light3_color" : [ 1, 1, 1 ],
+	"light3_diffuse_power" : 20,
+	"light3_position" : [ 0, 0, -20 ],
+	"light3_specular_power" : 2.20000000000000001,
+	"light3_specular_shininess" : 100,
+	"light_ambient_color" : [ 0, 0, 0 ],
+	"light_on" : true,
+	"mesh_color_option" : 1,
+	"mesh_shade_option" : 0,
+	"mesh_show_back_face" : false,
+	"mesh_show_wireframe" : false,
+	"point_color_option" : 7,
+	"point_show_normal" : false,
+	"point_size" : 2,
+	"show_coordinate_frame" : false,
+	"version_major" : 1,
+	"version_minor" : 0
+}

thirdparty/DROID-SLAM/misc/screenshot.png ADDED Viewed

Git LFS Details

SHA256: b8bb7761f678a743bf6a5b8af137c9d624e44a7d0f1111acf602823278a0529a
Pointer size: 131 Bytes
Size of remote file: 256 kB

thirdparty/DROID-SLAM/setup.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from setuptools import setup
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+import os.path as osp
+ROOT = osp.dirname(osp.abspath(__file__))
+setup(
+    name='droid_backends',
+    ext_modules=[
+        CUDAExtension('droid_backends',
+            include_dirs=[osp.join(ROOT, 'thirdparty/eigen')],
+            sources=[
+                'src/droid.cpp',
+                'src/droid_kernels.cu',
+                'src/correlation_kernels.cu',
+                'src/altcorr_kernel.cu',
+            ],
+            extra_compile_args={
+                'cxx': ['-O3'],
+                'nvcc': ['-O3',
+                    '-gencode=arch=compute_60,code=sm_60',
+                    '-gencode=arch=compute_61,code=sm_61',
+                    '-gencode=arch=compute_70,code=sm_70',
+                    '-gencode=arch=compute_75,code=sm_75',
+                    '-gencode=arch=compute_80,code=sm_80',
+                    '-gencode=arch=compute_86,code=sm_86',
+                ]
+            }),
+    ],
+    cmdclass={ 'build_ext' : BuildExtension }
+)
+setup(
+    name='lietorch',
+    version='0.2',
+    description='Lie Groups for PyTorch',
+    packages=['lietorch'],
+    package_dir={'': 'thirdparty/lietorch'},
+    ext_modules=[
+        CUDAExtension('lietorch_backends',
+            include_dirs=[
+                osp.join(ROOT, 'thirdparty/lietorch/lietorch/include'),
+                osp.join(ROOT, 'thirdparty/eigen')],
+            sources=[
+                'thirdparty/lietorch/lietorch/src/lietorch.cpp',
+                'thirdparty/lietorch/lietorch/src/lietorch_gpu.cu',
+                'thirdparty/lietorch/lietorch/src/lietorch_cpu.cpp'],
+            extra_compile_args={
+                'cxx': ['-O2'],
+                'nvcc': ['-O2',
+                    '-gencode=arch=compute_60,code=sm_60',
+                    '-gencode=arch=compute_61,code=sm_61',
+                    '-gencode=arch=compute_70,code=sm_70',
+                    '-gencode=arch=compute_75,code=sm_75',
+                    '-gencode=arch=compute_80,code=sm_80',
+                    '-gencode=arch=compute_86,code=sm_86',
+                ]
+            }),
+    ],
+    cmdclass={ 'build_ext' : BuildExtension }
+)

thirdparty/DROID-SLAM/src/altcorr_kernel.cu ADDED Viewed

	@@ -0,0 +1,356 @@

+#include <torch/extension.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <vector>
+#include <cuda_fp16.h>
+#include <cuda_runtime.h>
+#include <ATen/ATen.h>
+#include <ATen/NativeFunctions.h>
+#include <ATen/cuda/CUDAApplyUtils.cuh>
+#include <ATen/native/cuda/KernelUtils.cuh>
+#define BLOCK_H 4
+#define BLOCK_W 8
+#define BLOCK_HW BLOCK_H * BLOCK_W
+#define CHANNEL_STRIDE 32
+__forceinline__ __device__
+bool within_bounds(int h, int w, int H, int W) {
+  return h >= 0 && h < H && w >= 0 && w < W;
+}
+template <typename scalar_t>
+__global__ void altcorr_forward_kernel(
+    const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap1,
+    const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap2,
+    const torch::PackedTensorAccessor32<float,5,torch::RestrictPtrTraits> coords,
+    torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr,
+    int r)
+{
+  const int b = blockIdx.x;
+  const int h0 = blockIdx.y * blockDim.x;
+  const int w0 = blockIdx.z * blockDim.y;
+  const int tid = threadIdx.x * blockDim.y + threadIdx.y;
+  const int H1 = fmap1.size(1);
+  const int W1 = fmap1.size(2);
+  const int H2 = fmap2.size(1);
+  const int W2 = fmap2.size(2);
+  const int N = coords.size(1);
+  const int C = fmap1.size(3);
+  __shared__ scalar_t f1[CHANNEL_STRIDE][BLOCK_HW];
+  __shared__ scalar_t f2[CHANNEL_STRIDE][BLOCK_HW];
+  __shared__ float x2s[BLOCK_HW];
+  __shared__ float y2s[BLOCK_HW];
+  for (int c=0; c<C; c+=CHANNEL_STRIDE) {
+    for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
+      int k1 = k + tid / CHANNEL_STRIDE;
+      int h1 = h0 + k1 / BLOCK_W;
+      int w1 = w0 + k1 % BLOCK_W;
+      int c1 = tid % CHANNEL_STRIDE;
+      if (within_bounds(h1, w1, H1, W1))
+        f1[c1][k1] = fmap1[b][h1][w1][c+c1];
+      else
+        f1[c1][k1] = 0.0;
+    }
+    __syncthreads();
+    for (int n=0; n<N; n++) {
+      int h1 = h0 + threadIdx.x;
+      int w1 = w0 + threadIdx.y;
+      if (within_bounds(h1, w1, H1, W1)) {
+        x2s[tid] = coords[b][n][h1][w1][0];
+        y2s[tid] = coords[b][n][h1][w1][1];
+      }
+      float dx = x2s[tid] - floor(x2s[tid]);
+      float dy = y2s[tid] - floor(y2s[tid]);
+      int rd = 2*r + 1;
+      for (int iy=0; iy<rd+1; iy++) {
+        for (int ix=0; ix<rd+1; ix++) {
+          for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
+            int k1 = k + tid / CHANNEL_STRIDE;
+            int h2 = static_cast<int>(floor(y2s[k1])) - r + iy;
+            int w2 = static_cast<int>(floor(x2s[k1])) - r + ix;
+            int c2 = tid % CHANNEL_STRIDE;
+            if (within_bounds(h2, w2, H2, W2))
+              f2[c2][k1] = fmap2[b][h2][w2][c+c2];
+            else
+              f2[c2][k1] = static_cast<scalar_t>(0.0);
+          }
+          __syncthreads();
+          scalar_t s = 0.0;
+          for (int k=0; k<CHANNEL_STRIDE; k++)
+            s += f1[k][tid] * f2[k][tid];
+          int ix_nw = H1*W1*((iy-1) + rd*(ix-1));
+          int ix_ne = H1*W1*((iy-1) + rd*ix);
+          int ix_sw = H1*W1*(iy + rd*(ix-1));
+          int ix_se = H1*W1*(iy + rd*ix);
+          // int ix_nw = ((iy-1) + rd*(ix-1));
+          // int ix_ne = ((iy-1) + rd*ix);
+          // int ix_sw = (iy + rd*(ix-1));
+          // int ix_se = (iy + rd*ix);
+          scalar_t nw = s * static_cast<scalar_t>((dy) * (dx));
+          scalar_t ne = s * static_cast<scalar_t>((dy) * (1-dx));
+          scalar_t sw = s * static_cast<scalar_t>((1-dy) * (dx));
+          scalar_t se = s * static_cast<scalar_t>((1-dy) * (1-dx));
+          // if (iy > 0 && ix > 0 && within_bounds(h1, w1, H1, W1))
+          //   corr[b][n][ix_nw][h1][w1] += nw;
+          // if (iy > 0 && ix < rd && within_bounds(h1, w1, H1, W1))
+          //   corr[b][n][ix_ne][h1][w1] += ne;
+          // if (iy < rd && ix > 0 && within_bounds(h1, w1, H1, W1))
+          //   corr[b][n][ix_sw][h1][w1] += sw;
+          // if (iy < rd && ix < rd && within_bounds(h1, w1, H1, W1))
+          //   corr[b][n][ix_se][h1][w1] += se;
+          scalar_t* corr_ptr = &corr[b][n][0][h1][w1];
+          if (iy > 0 && ix > 0 && within_bounds(h1, w1, H1, W1))
+            *(corr_ptr + ix_nw) += nw;
+          if (iy > 0 && ix < rd && within_bounds(h1, w1, H1, W1))
+            *(corr_ptr + ix_ne) += ne;
+          if (iy < rd && ix > 0 && within_bounds(h1, w1, H1, W1))
+            *(corr_ptr + ix_sw) += sw;
+          if (iy < rd && ix < rd && within_bounds(h1, w1, H1, W1))
+            *(corr_ptr + ix_se) += se;
+        }
+      }
+    }
+  }
+}
+template <typename scalar_t>
+__global__ void altcorr_backward_kernel(
+    const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap1,
+    const torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap2,
+    const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> coords,
+    const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr_grad,
+    torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap1_grad,
+    torch::PackedTensorAccessor32<scalar_t,4,torch::RestrictPtrTraits> fmap2_grad,
+    torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> coords_grad,
+    int r)
+{
+  const int b = blockIdx.x;
+  const int h0 = blockIdx.y * blockDim.x;
+  const int w0 = blockIdx.z * blockDim.y;
+  const int tid = threadIdx.x * blockDim.y + threadIdx.y;
+  const int H1 = fmap1.size(1);
+  const int W1 = fmap1.size(2);
+  const int H2 = fmap2.size(1);
+  const int W2 = fmap2.size(2);
+  const int N = coords.size(1);
+  const int C = fmap1.size(3);
+  __shared__ scalar_t f1[CHANNEL_STRIDE][BLOCK_HW+1];
+  __shared__ scalar_t f2[CHANNEL_STRIDE][BLOCK_HW+1];
+  __shared__ scalar_t f1_grad[CHANNEL_STRIDE][BLOCK_HW+1];
+  __shared__ scalar_t f2_grad[CHANNEL_STRIDE][BLOCK_HW+1];
+  __shared__ scalar_t x2s[BLOCK_HW];
+  __shared__ scalar_t y2s[BLOCK_HW];
+  for (int c=0; c<C; c+=CHANNEL_STRIDE) {
+    for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
+      int k1 = k + tid / CHANNEL_STRIDE;
+      int h1 = h0 + k1 / BLOCK_W;
+      int w1 = w0 + k1 % BLOCK_W;
+      int c1 = tid % CHANNEL_STRIDE;
+      auto fptr = fmap1[b][h1][w1];
+      if (within_bounds(h1, w1, H1, W1))
+        f1[c1][k1] = fptr[c+c1];
+      else
+        f1[c1][k1] = 0.0;
+      f1_grad[c1][k1] = 0.0;
+    }
+    __syncthreads();
+    int h1 = h0 + threadIdx.x;
+    int w1 = w0 + threadIdx.y;
+    for (int n=0; n<N; n++) {
+      x2s[tid] = coords[b][n][h1][w1][0];
+      y2s[tid] = coords[b][n][h1][w1][1];
+      scalar_t dx = x2s[tid] - floor(x2s[tid]);
+      scalar_t dy = y2s[tid] - floor(y2s[tid]);
+      int rd = 2*r + 1;
+      for (int iy=0; iy<rd+1; iy++) {
+        for (int ix=0; ix<rd+1; ix++) {
+          for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
+            int k1 = k + tid / CHANNEL_STRIDE;
+            int h2 = static_cast<int>(floor(y2s[k1]))-r+iy;
+            int w2 = static_cast<int>(floor(x2s[k1]))-r+ix;
+            int c2 = tid % CHANNEL_STRIDE;
+            auto fptr = fmap2[b][h2][w2];
+            if (within_bounds(h2, w2, H2, W2))
+              f2[c2][k1] = fptr[c+c2];
+            else
+              f2[c2][k1] = 0.0;
+            f2_grad[c2][k1] = 0.0;
+          }
+          __syncthreads();
+          const scalar_t* grad_ptr = &corr_grad[b][n][0][h1][w1];
+          scalar_t g = 0.0;
+          int ix_nw = H1*W1*((iy-1) + rd*(ix-1));
+          int ix_ne = H1*W1*((iy-1) + rd*ix);
+          int ix_sw = H1*W1*(iy + rd*(ix-1));
+          int ix_se = H1*W1*(iy + rd*ix);
+          if (iy > 0 && ix > 0 && within_bounds(h1, w1, H1, W1))
+            g +=  *(grad_ptr + ix_nw) * dy * dx;
+          if (iy > 0 && ix < rd && within_bounds(h1, w1, H1, W1))
+            g += *(grad_ptr + ix_ne) * dy * (1-dx);
+          if (iy < rd && ix > 0 && within_bounds(h1, w1, H1, W1))
+            g += *(grad_ptr + ix_sw) * (1-dy) * dx;
+          if (iy < rd && ix < rd && within_bounds(h1, w1, H1, W1))
+            g += *(grad_ptr + ix_se) * (1-dy) * (1-dx);
+          for (int k=0; k<CHANNEL_STRIDE; k++) {
+            f1_grad[k][tid] += g * f2[k][tid];
+            f2_grad[k][tid] += g * f1[k][tid];
+          }
+          for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
+            int k1 = k + tid / CHANNEL_STRIDE;
+            int h2 = static_cast<int>(floor(y2s[k1]))-r+iy;
+            int w2 = static_cast<int>(floor(x2s[k1]))-r+ix;
+            int c2 = tid % CHANNEL_STRIDE;
+            scalar_t* fptr = &fmap2_grad[b][h2][w2][0];
+            if (within_bounds(h2, w2, H2, W2))
+              atomicAdd(fptr+c+c2, f2_grad[c2][k1]);
+          }
+        }
+      }
+    }
+    __syncthreads();
+    for (int k=0; k<BLOCK_HW; k+=BLOCK_HW/CHANNEL_STRIDE) {
+      int k1 = k + tid / CHANNEL_STRIDE;
+      int h1 = h0 + k1 / BLOCK_W;
+      int w1 = w0 + k1 % BLOCK_W;
+      int c1 = tid % CHANNEL_STRIDE;
+      scalar_t* fptr = &fmap1_grad[b][h1][w1][0];
+      if (within_bounds(h1, w1, H1, W1))
+        fptr[c+c1] += f1_grad[c1][k1];
+    }
+  }
+}
+std::vector<torch::Tensor> altcorr_cuda_forward(
+  torch::Tensor fmap1,
+  torch::Tensor fmap2,
+  torch::Tensor coords,
+  int radius)
+{
+  const auto B = coords.size(0);
+  const auto N = coords.size(1);
+  const auto H = coords.size(2);
+  const auto W = coords.size(3);
+  const auto rd = 2 * radius + 1;
+  auto opts = fmap1.options();
+  auto corr = torch::zeros({B, N, rd*rd, H, W}, opts);
+  const dim3 blocks(B, (H+BLOCK_H-1)/BLOCK_H, (W+BLOCK_W-1)/BLOCK_W);
+  const dim3 threads(BLOCK_H, BLOCK_W);
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(fmap1.type(), "altcorr_forward_kernel", ([&] {
+    altcorr_forward_kernel<scalar_t><<<blocks, threads>>>(
+        fmap1.packed_accessor32<scalar_t,4,torch::RestrictPtrTraits>(),
+        fmap2.packed_accessor32<scalar_t,4,torch::RestrictPtrTraits>(),
+        coords.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
+        corr.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
+        radius);
+  }));
+  return {corr};
+}
+std::vector<torch::Tensor> altcorr_cuda_backward(
+  torch::Tensor fmap1,
+  torch::Tensor fmap2,
+  torch::Tensor coords,
+  torch::Tensor corr_grad,
+  int radius)
+{
+  const auto B = coords.size(0);
+  const auto N = coords.size(1);
+  const auto H1 = fmap1.size(1);
+  const auto W1 = fmap1.size(2);
+  const auto H2 = fmap2.size(1);
+  const auto W2 = fmap2.size(2);
+  const auto C = fmap1.size(3);
+  auto opts = fmap1.options();
+  auto fmap1_grad = torch::zeros({B, H1, W1, C}, opts);
+  auto fmap2_grad = torch::zeros({B, H2, W2, C}, opts);
+  auto coords_grad = torch::zeros({B, N, H1, W1, 2}, opts);
+  const dim3 blocks(B, (H1+BLOCK_H-1)/BLOCK_H, (W1+BLOCK_W-1)/BLOCK_W);
+  const dim3 threads(BLOCK_H, BLOCK_W);
+  altcorr_backward_kernel<float><<<blocks, threads>>>(
+    fmap1.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
+    fmap2.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
+    coords.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
+    corr_grad.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
+    fmap1_grad.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
+    fmap2_grad.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
+    coords_grad.packed_accessor32<float,5,torch::RestrictPtrTraits>(),
+    radius);
+  return {fmap1_grad, fmap2_grad, coords_grad};
+}

thirdparty/DROID-SLAM/src/correlation_kernels.cu ADDED Viewed

	@@ -0,0 +1,185 @@

+#include <torch/extension.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <vector>
+#include <cuda_fp16.h>
+#include <cuda_runtime.h>
+#include <ATen/ATen.h>
+#include <ATen/NativeFunctions.h>
+#include <ATen/Parallel.h>
+#define BLOCK 16
+__forceinline__ __device__ bool within_bounds(int h, int w, int H, int W) {
+  return h >= 0 && h < H && w >= 0 && w < W;
+}
+template <typename scalar_t>
+__global__ void corr_index_forward_kernel(
+    const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> volume,
+    const torch::PackedTensorAccessor32<float,4,torch::RestrictPtrTraits> coords,
+    torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr,
+    int r)
+{
+  // batch index
+  const int x = blockIdx.x * blockDim.x + threadIdx.x;
+  const int y = blockIdx.y * blockDim.y + threadIdx.y;
+  const int n = blockIdx.z;
+  const int h1 = volume.size(1);
+  const int w1 = volume.size(2);
+  const int h2 = volume.size(3);
+  const int w2 = volume.size(4);
+  if (!within_bounds(y, x, h1, w1)) {
+    return;
+  }
+  float x0 = coords[n][0][y][x];
+  float y0 = coords[n][1][y][x];
+  float dx = x0 - floor(x0);
+  float dy = y0 - floor(y0);
+  int rd = 2*r + 1;
+  for (int i=0; i<rd+1; i++) {
+    for (int j=0; j<rd+1; j++) {
+      int x1 = static_cast<int>(floor(x0)) - r + i;
+      int y1 = static_cast<int>(floor(y0)) - r + j;
+      if (within_bounds(y1, x1, h2, w2)) {
+        scalar_t s = volume[n][y][x][y1][x1];
+        if (i > 0 && j > 0)
+          corr[n][i-1][j-1][y][x] += s * scalar_t(dx * dy);
+        if (i > 0 && j < rd)
+          corr[n][i-1][j][y][x] += s * scalar_t(dx * (1.0f-dy));
+        if (i < rd && j > 0)
+          corr[n][i][j-1][y][x] += s * scalar_t((1.0f-dx) * dy);
+        if (i < rd && j < rd)
+          corr[n][i][j][y][x] += s * scalar_t((1.0f-dx) * (1.0f-dy));
+      }
+    }
+  }
+}
+template <typename scalar_t>
+__global__ void corr_index_backward_kernel(
+    const torch::PackedTensorAccessor32<float,4,torch::RestrictPtrTraits> coords,
+    const torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> corr_grad,
+    torch::PackedTensorAccessor32<scalar_t,5,torch::RestrictPtrTraits> volume_grad,
+    int r)
+{
+  // batch index
+  const int x = blockIdx.x * blockDim.x + threadIdx.x;
+  const int y = blockIdx.y * blockDim.y + threadIdx.y;
+  const int n = blockIdx.z;
+  const int h1 = volume_grad.size(1);
+  const int w1 = volume_grad.size(2);
+  const int h2 = volume_grad.size(3);
+  const int w2 = volume_grad.size(4);
+  if (!within_bounds(y, x, h1, w1)) {
+    return;
+  }
+  float x0 = coords[n][0][y][x];
+  float y0 = coords[n][1][y][x];
+  float dx = x0 - floor(x0);
+  float dy = y0 - floor(y0);
+  int rd = 2*r + 1;
+  for (int i=0; i<rd+1; i++) {
+    for (int j=0; j<rd+1; j++) {
+      int x1 = static_cast<int>(floor(x0)) - r + i;
+      int y1 = static_cast<int>(floor(y0)) - r + j;
+      if (within_bounds(y1, x1, h2, w2)) {
+        scalar_t g = 0.0;
+        if (i > 0 && j > 0)
+          g += corr_grad[n][i-1][j-1][y][x] * scalar_t(dx * dy);
+        if (i > 0 && j < rd)
+          g += corr_grad[n][i-1][j][y][x] * scalar_t(dx * (1.0f-dy));
+        if (i < rd && j > 0)
+          g += corr_grad[n][i][j-1][y][x] * scalar_t((1.0f-dx) * dy);
+        if (i < rd && j < rd)
+          g += corr_grad[n][i][j][y][x] * scalar_t((1.0f-dx) * (1.0f-dy));
+        volume_grad[n][y][x][y1][x1] += g;
+      }
+    }
+  }
+}
+std::vector<torch::Tensor> corr_index_cuda_forward(
+    torch::Tensor volume,
+    torch::Tensor coords,
+    int radius)
+{
+  const auto batch_size = volume.size(0);
+  const auto ht = volume.size(1);
+  const auto wd = volume.size(2);
+  const dim3 blocks((wd + BLOCK - 1) / BLOCK,
+                    (ht + BLOCK - 1) / BLOCK,
+                    batch_size);
+  const dim3 threads(BLOCK, BLOCK);
+  auto opts = volume.options();
+  torch::Tensor corr = torch::zeros(
+    {batch_size, 2*radius+1, 2*radius+1, ht, wd}, opts);
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(volume.type(), "sampler_forward_kernel", ([&] {
+    corr_index_forward_kernel<scalar_t><<<blocks, threads>>>(
+      volume.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
+      coords.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
+      corr.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
+      radius);
+   }));
+  return {corr};
+}
+std::vector<torch::Tensor> corr_index_cuda_backward(
+  torch::Tensor volume,
+  torch::Tensor coords,
+  torch::Tensor corr_grad,
+  int radius)
+{
+  const auto batch_size = volume.size(0);
+  const auto ht = volume.size(1);
+  const auto wd = volume.size(2);
+  auto volume_grad = torch::zeros_like(volume);
+  const dim3 blocks((wd + BLOCK - 1) / BLOCK,
+                    (ht + BLOCK - 1) / BLOCK,
+                    batch_size);
+  const dim3 threads(BLOCK, BLOCK);
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(volume.type(), "sampler_backward_kernel", ([&] {
+    corr_index_backward_kernel<scalar_t><<<blocks, threads>>>(
+      coords.packed_accessor32<float,4,torch::RestrictPtrTraits>(),
+      corr_grad.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
+      volume_grad.packed_accessor32<scalar_t,5,torch::RestrictPtrTraits>(),
+      radius);
+   }));
+  return {volume_grad};
+}