Source code for deeptab.configs.models.autoint_config
from collections.abc import Callable
from dataclasses import dataclass, field
import torch.nn as nn
from deeptab.nn.blocks.transformer import ReGLU
from ..core import BaseModelConfig
[docs]
@dataclass
class AutoIntConfig(BaseModelConfig):
"""Architecture-only configuration for AutoInt models (DeepTab 2.0 API).
Parameters
----------
d_model : int, default=128
Dimensionality of the transformer model.
n_layers : int, default=4
Number of transformer layers.
n_heads : int, default=8
Number of attention heads in the transformer.
attn_dropout : float, default=0.2
Dropout rate for the attention mechanism.
transformer_dim_feedforward : int, default=256
Dimensionality of the feed-forward layers in the transformer.
fprenorm : bool, default=False
Whether to apply pre-normalization in attention layers.
bias : bool, default=True
Whether to use bias in linear layers.
use_cls : bool, default=False
Whether to use a CLS token for pooling instead of averaging.
kv_compression : float, default=0.5
Compression ratio for key-value pairs.
kv_compression_sharing : str, default='key-value'
Sharing strategy for key-value compression ('headwise', or 'key-
value').
"""
# Override parent defaults
d_model: int = 128
# Transformer-specific architecture
n_layers: int = 4
n_heads: int = 8
attn_dropout: float = 0.2
transformer_dim_feedforward: int = 256
fprenorm: bool = False
bias: bool = True
use_cls: bool = False
kv_compression: float = 0.5
kv_compression_sharing: str = "key-value"