Source code for pytorch3d.implicitron.models.view_pooler.view_pooler

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# pyre-unsafe

from typing import Dict, List, Optional, Union

import torch
from pytorch3d.implicitron.tools.config import Configurable, run_auto_creation
from pytorch3d.renderer.cameras import CamerasBase

from .feature_aggregator import FeatureAggregatorBase
from .view_sampler import ViewSampler


# pyre-ignore: 13
[docs] class ViewPooler(Configurable, torch.nn.Module): """ Implements sampling of image-based features at the 2d projections of a set of 3D points, and a subsequent aggregation of the resulting set of features per-point. Args: view_sampler: An instance of ViewSampler which is used for sampling of image-based features at the 2D projections of a set of 3D points. feature_aggregator_class_type: The name of the feature aggregator class which is available in the global registry. feature_aggregator: A feature aggregator class which inherits from FeatureAggregatorBase. Typically, the aggregated features and their masks are output by a `ViewSampler` which samples feature tensors extracted from a set of source images. FeatureAggregator executes step (4) above. """ view_sampler: ViewSampler feature_aggregator_class_type: str = "AngleWeightedReductionFeatureAggregator" feature_aggregator: FeatureAggregatorBase def __post_init__(self): run_auto_creation(self)
[docs] def get_aggregated_feature_dim(self, feats: Union[Dict[str, torch.Tensor], int]): """ Returns the final dimensionality of the output aggregated features. Args: feats: Either a `dict` of sampled features `{f_i: t_i}` corresponding to the `feats_sampled` argument of `feature_aggregator,forward`, or an `int` representing the sum of dimensionalities of each `t_i`. Returns: aggregated_feature_dim: The final dimensionality of the output aggregated features. """ return self.feature_aggregator.get_aggregated_feature_dim(feats)
[docs] def has_aggregation(self): """ Specifies whether the `feature_aggregator` reduces the output `reduce_dim` dimension to 1. Returns: has_aggregation: `True` if `reduce_dim==1`, else `False`. """ return self.feature_aggregator.has_aggregation()
[docs] def forward( self, *, # force kw args pts: torch.Tensor, seq_id_pts: Union[List[int], List[str], torch.LongTensor], camera: CamerasBase, seq_id_camera: Union[List[int], List[str], torch.LongTensor], feats: Dict[str, torch.Tensor], masks: Optional[torch.Tensor], **kwargs, ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: """ Project each point cloud from a batch of point clouds to corresponding input cameras, sample features at the 2D projection locations in a batch of source images, and aggregate the pointwise sampled features. Args: pts: A tensor of shape `[pts_batch x n_pts x 3]` in world coords. seq_id_pts: LongTensor of shape `[pts_batch]` denoting the ids of the scenes from which `pts` were extracted, or a list of string names. camera: 'n_cameras' cameras, each coresponding to a batch element of `feats`. seq_id_camera: LongTensor of shape `[n_cameras]` denoting the ids of the scenes corresponding to cameras in `camera`, or a list of string names. feats: a dict of tensors of per-image features `{feat_i: T_i}`. Each tensor `T_i` is of shape `[n_cameras x dim_i x H_i x W_i]`. masks: `[n_cameras x 1 x H x W]`, define valid image regions for sampling `feats`. Returns: feats_aggregated: If `feature_aggregator.concatenate_output==True`, a tensor of shape `(pts_batch, reduce_dim, n_pts, sum(dim_1, ... dim_N))` containing the aggregated features. `reduce_dim` depends on the specific feature aggregator implementation and typically equals 1 or `n_cameras`. If `feature_aggregator.concatenate_output==False`, the aggregator does not concatenate the aggregated features and returns a dictionary of per-feature aggregations `{f_i: t_i_aggregated}` instead. Each `t_i_aggregated` is of shape `(pts_batch, reduce_dim, n_pts, aggr_dim_i)`. """ # (1) Sample features and masks at the ray points sampled_feats, sampled_masks = self.view_sampler( pts=pts, seq_id_pts=seq_id_pts, camera=camera, seq_id_camera=seq_id_camera, feats=feats, masks=masks, ) # (2) Aggregate features from multiple views # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a function. feats_aggregated = self.feature_aggregator( # noqa: E731 sampled_feats, sampled_masks, pts=pts, camera=camera, ) # TODO: do we need to pass a callback rather than compute here? return feats_aggregated