diff --git a/dedicated_ai_server/server.py b/dedicated_ai_server/server.py index d5ed84f..ba6e0a1 100644 --- a/dedicated_ai_server/server.py +++ b/dedicated_ai_server/server.py @@ -14,9 +14,6 @@ from config import Config, read_config from secret_stream_socket import ProtocolError, SecretStreamSocket, wrap_connection_socket -MAX_NEW_TOKENS = 256 - - @dataclass class MessagePiece: piece: str = "" @@ -119,8 +116,9 @@ def generate_llm_pieces(bundle: ModelBundle, messages: list) -> Iterable[str]: eos_token_ids = set(int(x) for x in eos_token_id) else: eos_token_ids = {int(eos_token_id)} + print(f"[debug] eos_token_ids={sorted(eos_token_ids)}", flush=True) - for _ in range(MAX_NEW_TOKENS): + while True: with torch.inference_mode(): outputs = bundle.model( input_ids=input_ids,