infinite token limit
This commit is contained in:
parent
bcdb817e0f
commit
25f1a6ec9e
@ -14,9 +14,6 @@ from config import Config, read_config
|
|||||||
from secret_stream_socket import ProtocolError, SecretStreamSocket, wrap_connection_socket
|
from secret_stream_socket import ProtocolError, SecretStreamSocket, wrap_connection_socket
|
||||||
|
|
||||||
|
|
||||||
MAX_NEW_TOKENS = 256
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class MessagePiece:
|
class MessagePiece:
|
||||||
piece: str = ""
|
piece: str = ""
|
||||||
@ -119,8 +116,9 @@ def generate_llm_pieces(bundle: ModelBundle, messages: list) -> Iterable[str]:
|
|||||||
eos_token_ids = set(int(x) for x in eos_token_id)
|
eos_token_ids = set(int(x) for x in eos_token_id)
|
||||||
else:
|
else:
|
||||||
eos_token_ids = {int(eos_token_id)}
|
eos_token_ids = {int(eos_token_id)}
|
||||||
|
print(f"[debug] eos_token_ids={sorted(eos_token_ids)}", flush=True)
|
||||||
|
|
||||||
for _ in range(MAX_NEW_TOKENS):
|
while True:
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
outputs = bundle.model(
|
outputs = bundle.model(
|
||||||
input_ids=input_ids,
|
input_ids=input_ids,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user