Fixed some typos in comments

2023-04-07 00:50:18 +08:00
parent aac76a1fb0
commit 20114f5138
8 changed files with 11 additions and 11 deletions
--- a/segment_anything/modeling/image_encoder.py
+++ b/segment_anything/modeling/image_encoder.py
@@ -198,7 +198,7 @@ class Attention(nn.Module):
        Args:
            dim (int): Number of input channels.
            num_heads (int): Number of attention heads.
-            qkv_bias (bool:  If True, add a learnable bias to query, key, value.
+            qkv_bias (bool):  If True, add a learnable bias to query, key, value.
            rel_pos (bool): If True, add relative positional embeddings to the attention map.
            rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
            input_size (int or None): Input resolution for calculating the relative positional
@@ -270,7 +270,7 @@ def window_unpartition(
    """
    Window unpartition into original sequences and removing padding.
    Args:
-        x (tensor): input tokens with [B * num_windows, window_size, window_size, C].
+        windows (tensor): input tokens with [B * num_windows, window_size, window_size, C].
        window_size (int): window size.
        pad_hw (Tuple): padded height and width (Hp, Wp).
        hw (Tuple): original height and width (H, W) before padding.
--- a/segment_anything/modeling/sam.py
+++ b/segment_anything/modeling/sam.py
@@ -85,8 +85,8 @@ class Sam(nn.Module):
          (list(dict)): A list over input images, where each element is
            as dictionary with the following keys.
              'masks': (torch.Tensor) Batched binary mask predictions,
-                with shape BxCxHxW, where B is the number of input promts,
-                C is determiend by multimask_output, and (H, W) is the
+                with shape BxCxHxW, where B is the number of input prompts,
+                C is determined by multimask_output, and (H, W) is the
                original size of the image.
              'iou_predictions': (torch.Tensor) The model's predictions
                of mask quality, in shape BxC.
--- a/segment_anything/modeling/transformer.py
+++ b/segment_anything/modeling/transformer.py
@@ -96,7 +96,7 @@ class TwoWayTransformer(nn.Module):
                key_pe=image_pe,
            )

-        # Apply the final attenion layer from the points to the image
+        # Apply the final attention layer from the points to the image
        q = queries + point_embedding
        k = keys + image_pe
        attn_out = self.final_attn_token_to_image(q=q, k=k, v=keys)