sglang0.4.5.post1/python/sglang/srt/layers/pooler.py

51 lines
1.6 KiB
Python

# adapted from
# https://github.com/vllm-project/vllm/blob/82a1b1a82b1fbb454c82a9ef95730b929c9b270c/vllm/model_executor/layers/pooler.py
from dataclasses import dataclass
from enum import IntEnum
import torch
import torch.nn as nn
from sglang.srt.model_executor.model_runner import ForwardBatch
class PoolingType(IntEnum):
LAST = 0
@dataclass
class EmbeddingPoolerOutput:
embeddings: torch.Tensor
class Pooler(nn.Module):
"""A layer that pools specific information from hidden states.
This layer does the following:
1. Extracts specific tokens or aggregates data based on pooling method.
2. Normalizes output if specified.
3. Returns structured results as `PoolerOutput`.
Attributes:
pooling_type: The type of pooling to use (LAST, AVERAGE, MAX).
normalize: Whether to normalize the pooled data.
"""
def __init__(self, pooling_type: PoolingType, normalize: bool):
super().__init__()
self.pooling_type = pooling_type
self.normalize = normalize
def forward(
self, hidden_states: torch.Tensor, forward_batch: ForwardBatch
) -> EmbeddingPoolerOutput:
if self.pooling_type == PoolingType.LAST:
last_token_indices = torch.cumsum(forward_batch.extend_seq_lens, dim=0) - 1
pooled_data = hidden_states[last_token_indices]
else:
raise ValueError(f"Invalid pooling type: {self.pooling_type}")
if self.normalize:
pooled_data = nn.functional.normalize(pooled_data, p=2, dim=1)
return EmbeddingPoolerOutput(embeddings=pooled_data)