Commit
·
3d19e0f
1
Parent(s):
865b816
Update main.py
Browse files
main.py
CHANGED
|
@@ -91,33 +91,25 @@ async def chat(request: ChatCompletionRequest):
|
|
| 91 |
return StreamingResponse(format_response(chat_chunks), media_type="text/event-stream")
|
| 92 |
|
| 93 |
async def stream_response(tokens: Any) -> None:
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
{
|
| 101 |
-
'
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
})
|
| 114 |
-
await send({
|
| 115 |
-
"type": "http.response.body",
|
| 116 |
-
"body": b"event: done\ndata: {}\n\n",
|
| 117 |
-
"more_body": False,
|
| 118 |
-
})
|
| 119 |
-
except Exception as e:
|
| 120 |
-
print(f"Exception in event publisher: {str(e)}")
|
| 121 |
|
| 122 |
async def chatV2(request: Request, body: ChatCompletionRequest):
|
| 123 |
combined_messages = ' '.join([message.content for message in body.messages])
|
|
|
|
| 91 |
return StreamingResponse(format_response(chat_chunks), media_type="text/event-stream")
|
| 92 |
|
| 93 |
async def stream_response(tokens: Any) -> None:
|
| 94 |
+
try:
|
| 95 |
+
iterator: Generator = llm.generate(tokens)
|
| 96 |
+
for chat_chunk in iterator:
|
| 97 |
+
response = {
|
| 98 |
+
'choices': [
|
| 99 |
+
{
|
| 100 |
+
'message': {
|
| 101 |
+
'role': 'system',
|
| 102 |
+
'content': llm.detokenize(chat_chunk)
|
| 103 |
+
},
|
| 104 |
+
'finish_reason': 'stop' if llm.detokenize(chat_chunk) == "[DONE]" else 'unknown'
|
| 105 |
+
}
|
| 106 |
+
]
|
| 107 |
+
}
|
| 108 |
+
yield f"data: {json.dumps(response)}\n\n"
|
| 109 |
+
yield b"event: done\ndata: {}\n\n"
|
| 110 |
+
except Exception as e:
|
| 111 |
+
print(f"Exception in event publisher: {str(e)}")
|
| 112 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
async def chatV2(request: Request, body: ChatCompletionRequest):
|
| 115 |
combined_messages = ' '.join([message.content for message in body.messages])
|