sglang0.4.5.post1/scripts/playground/router/tree.py

import time
from collections import defaultdict
from typing import Dict, List


class Node:
    def __init__(self):
        self.children: Dict[str, Node] = dict()
        # We choose to use text because most of the use cases are text-to-text,
        # so we can save the tokenizing overhead.
        self.text: str = ""
        # Maps tenant_id to their last access timestamp
        self.tenant_last_access_time: Dict[str, float] = dict()
        self.parent = None


def shared_prefix_length(s1, s2):
    min_length = min(len(s1), len(s2))
    for i in range(min_length):
        if s1[i] != s2[i]:
            return i
    return min_length


class MultiTenantRadixTree:
    """
    Python Reference of Rust implementation of MultiTenantRadixTree

    MultiTenantRadixTree is the overlap of multiple radix trees by different tenant
    Each node in the tree can be owned by multiple tenants, allowing for efficient storage of common prefixes
    while maintaining tenant isolation.

    Key concepts:
    - Tenant: An entity that owns a subset of the stored strings
    - Each node tracks which tenants have access to it via tenant_last_access_time
    - The tree structure is shared, but queries can be filtered by tenant_id
    """

    def __init__(self):
        self.root = Node()

    def insert(self, s: str, tenant_id: str) -> None:
        """
        Insert string 's' and associate it with the given tenant_id.

        Args:
            s: The string to insert
            tenant_id: The identifier of the tenant who owns this string
        """
        curr = self.root
        curr_idx = 0
        curr.tenant_last_access_time[tenant_id] = time.time()

        while curr_idx < len(s):
            matched_node = None
            if s[curr_idx] in curr.children:
                matched_node = curr.children[s[curr_idx]]

            if matched_node is None:
                # No match => create a new node
                new_node = Node()
                new_node.text = s[curr_idx:]
                new_node.parent = curr

                curr.children[s[curr_idx]] = new_node
                curr_idx = len(s)
                curr = new_node
                curr.tenant_last_access_time[tenant_id] = time.time()
            else:
                shared_len = shared_prefix_length(s[curr_idx:], matched_node.text)

                # 1. If the matched text is shorter than the node text => split the node
                if shared_len < len(matched_node.text):
                    # Split structure: [matched_node] => [new_node] -> [contracted_matched_node]

                    matched_text = matched_node.text[:shared_len]
                    unmatched_text = matched_node.text[shared_len:]

                    new_node = Node()
                    new_node.text = matched_text
                    new_node.children = {unmatched_text[0]: matched_node}
                    new_node.parent = curr
                    new_node.parent.children[matched_text[0]] = new_node
                    new_node.tenant_last_access_time = (
                        matched_node.tenant_last_access_time.copy()
                    )

                    # Contract matched node
                    matched_node.text = unmatched_text
                    matched_node.parent = new_node

                    curr_idx += shared_len
                    curr = new_node
                    curr.tenant_last_access_time[tenant_id] = time.time()
                # 2. If the matched text is longer or equal to the node text => walk down the node
                else:
                    curr_idx += shared_len
                    curr = matched_node
                    curr.tenant_last_access_time[tenant_id] = time.time()

    def prefix_match(self, s: str) -> tuple[str, int]:
        """
        Match string 's' with multiple tenants' trees in one operation.

        Args:
            s: The string to match

        Returns:
            Tuple(str, int): The longest prefix of 's' that matches the tree and the first tenant_id that own the matched prefix
        """
        curr = self.root
        curr_idx = 0

        ret_text = ""
        ret_tenant = None

        while curr_idx < len(s):
            matched_node = None
            if s[curr_idx] in curr.children:
                matched_node = curr.children[s[curr_idx]]

            if matched_node is None:
                break

            shared_len = shared_prefix_length(s[curr_idx:], matched_node.text)
            if shared_len == len(matched_node.text):
                curr_idx += shared_len
                curr = matched_node
            else:
                curr_idx += shared_len
                curr = matched_node
                break

        selected_tenant = list(curr.tenant_last_access_time.keys())[0]

        # traverse back to the root to update last access time for the selected tenant
        while curr != self.root:
            curr.tenant_last_access_time[selected_tenant] = time.time()
            curr = curr.parent

        return s[:curr_idx], selected_tenant

    def evict_tenant_data(self, max_size_per_tenant: Dict[str, int]) -> None:
        """
        Evict data for tenants that have exceeded their storage limits.

        Args:
            max_size_per_tenant: Dictionary mapping tenant_id to their maximum allowed storage size
        """

        def leaf_of(node):
            """
            If the node is a leaf for a tenant, add tenant_id to the return list
            This will return list of tenant ids
            If not a leaf for all tenants, return []
            """
            candidates = dict([(k, True) for k in node.tenant_last_access_time.keys()])

            for n in node.children.values():
                for c in n.tenant_last_access_time.keys():
                    candidates[c] = False

            return [k for k, v in candidates.items() if v]

        # maintain a heap with (time, tenant, node) as the value
        import heapq

        # 1. traverse the tree to
        #   a. add all the leaves into a heap (a node with N tenants will be added N times into the heap)
        #   b. calculate the used size for each tenant
        # do a dfs with stack
        stack = [self.root]
        pq = []
        used_size_per_tenant = defaultdict(int)

        while stack:
            curr = stack.pop()
            for t in curr.tenant_last_access_time.keys():
                used_size_per_tenant[t] += len(curr.text)

            for c in curr.children.values():
                stack.append(c)

            # if the node is a leaf for a tenant, add the tenant to the heap
            tenants = leaf_of(curr)
            for t in tenants:
                heapq.heappush(pq, (curr.tenant_last_access_time[t], t, curr))

        # 2. pop the heap
        #   a. if the tenant's used size is less than the limit, continue
        #   b. if the tenant's used size is greater than the limit, remove the leaf and update the used size, and add its parent to the heap
        while len(pq) > 0:
            time, tenant, node = heapq.heappop(pq)
            if used_size_per_tenant[tenant] <= max_size_per_tenant[tenant]:
                continue

            # remove the leaf
            used_size_per_tenant[tenant] -= len(node.text)
            del node.tenant_last_access_time[tenant]
            # if no children and no tenants, remove the node
            if len(node.children) == 0 and len(node.tenant_last_access_time) == 0:
                del node.parent.children[node.text[0]]

            # add its parent to the heap
            if tenant in leaf_of(node.parent):
                heapq.heappush(
                    pq,
                    (node.parent.tenant_last_access_time[tenant], tenant, node.parent),
                )

    def get_used_size_per_tenant(self) -> Dict[str, int]:
        """
        Calculate the used storage size for each tenant.

        Returns:
            Dict[str, int]: A dictionary mapping tenant_id to their used storage size
        """
        used_size_per_tenant = defaultdict(int)

        stack = [self.root]
        while stack:
            curr = stack.pop()
            for t in curr.tenant_last_access_time.keys():
                used_size_per_tenant[t] += len(curr.text)

            for c in curr.children.values():
                stack.append(c)

        return used_size_per_tenant

    def remove_tenant(self, tenant_id: str) -> None:
        """
        Remove all data associated with a specific tenant from the tree.
        This operation maintains the integrity of the shared tree structure while
        removing only the specified tenant's access information.

        Args:
            tenant_id: The identifier of the tenant whose data should be removed
        """
        # TODO: Implementation needed
        pass

    def pretty_print(self) -> str:
        """
        Returns a string representation of the tree showing the structure, tenant ownership,
        and leaf status for each node.

        Returns:
            str: A formatted string showing the tree hierarchy with tenant information
        """

        def _node_to_str(node: Node, prefix: str = "", is_last: bool = True) -> str:
            # Current node representation
            node_str = prefix
            node_str += "└── " if is_last else "├── "

            # Add node text
            node_str += f"'{node.text}' ["

            # Add tenant information including both timestamp and leaf status
            tenant_info = []
            for tid, ts in node.tenant_last_access_time.items():
                time_str = (
                    time.strftime("%H:%M:%S.", time.localtime(ts))
                    + f"{(ts % 1):0.3f}"[2:]
                )
                tenant_info.append(f"{tid} | {time_str}")

            node_str += ", ".join(tenant_info)
            node_str += "]\n"

            # Handle children
            children = list(node.children.items())
            for i, (char, child) in enumerate(children):
                is_last_child = i == len(children) - 1
                # Adjust prefix for children based on whether this is the last child
                new_prefix = prefix + ("    " if is_last else "│   ")
                node_str += _node_to_str(child, new_prefix, is_last_child)

            return node_str

        if not self.root.children:
            return "Empty tree"

        # Start with root's children since root itself is just an empty node
        result = ""
        children = list(self.root.children.items())
        for i, (char, child) in enumerate(children):
            is_last = i == len(children) - 1
            result += _node_to_str(child, "", is_last)

        return result