Commit
·
b9def7b
1
Parent(s):
f7003d5
Update docs
Browse files- app.py +1 -1
- routers/soundex.py +5 -0
- routers/tokenize.py +16 -1
app.py
CHANGED
|
@@ -26,7 +26,7 @@ app = FastAPI(
|
|
| 26 |
# },
|
| 27 |
license_info={
|
| 28 |
"name": "Apache 2.0",
|
| 29 |
-
"
|
| 30 |
},
|
| 31 |
)
|
| 32 |
|
|
|
|
| 26 |
# },
|
| 27 |
license_info={
|
| 28 |
"name": "Apache 2.0",
|
| 29 |
+
"url": "https://www.apache.org/licenses/LICENSE-2.0.html",
|
| 30 |
},
|
| 31 |
)
|
| 32 |
|
routers/soundex.py
CHANGED
|
@@ -19,5 +19,10 @@ class SoundexEngine(str, Enum):
|
|
| 19 |
def soundex(text: str, engine: SoundexEngine = "udom83"):
|
| 20 |
"""
|
| 21 |
This api converts Thai text into phonetic code.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
"""
|
| 23 |
return {"soundex": py_soundex(text=text, engine=engine)}
|
|
|
|
| 19 |
def soundex(text: str, engine: SoundexEngine = "udom83"):
|
| 20 |
"""
|
| 21 |
This api converts Thai text into phonetic code.
|
| 22 |
+
|
| 23 |
+
## Input
|
| 24 |
+
|
| 25 |
+
= **text**: A word that want into phonetic code.
|
| 26 |
+
- **engine**: Soundex Engine (default is udom83)
|
| 27 |
"""
|
| 28 |
return {"soundex": py_soundex(text=text, engine=engine)}
|
routers/tokenize.py
CHANGED
|
@@ -43,6 +43,11 @@ class SentTokenizeEngine(BaseModel):
|
|
| 43 |
def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
|
| 44 |
"""
|
| 45 |
Word tokenize or word segmentation for Thai language
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
"""
|
| 47 |
return {"words": py_word_tokenize(text=text, engine=engine)}
|
| 48 |
|
|
@@ -50,7 +55,12 @@ def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
|
|
| 50 |
@router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
|
| 51 |
def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
|
| 52 |
"""
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
"""
|
| 55 |
return {"subwords": py_subword_tokenize(text=text, engine=engine)}
|
| 56 |
|
|
@@ -59,5 +69,10 @@ def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
|
|
| 59 |
def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
|
| 60 |
"""
|
| 61 |
Thai sentence segmentation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
"""
|
| 63 |
return {"sents": py_sent_tokenize(text=text, engine=engine)}
|
|
|
|
| 43 |
def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
|
| 44 |
"""
|
| 45 |
Word tokenize or word segmentation for Thai language
|
| 46 |
+
|
| 47 |
+
## Input
|
| 48 |
+
|
| 49 |
+
= **text**: Text that want to tokenize.
|
| 50 |
+
- **engine**: Word Tokenize Engine (default is newmm)
|
| 51 |
"""
|
| 52 |
return {"words": py_word_tokenize(text=text, engine=engine)}
|
| 53 |
|
|
|
|
| 55 |
@router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
|
| 56 |
def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
|
| 57 |
"""
|
| 58 |
+
Subword tokenize or subword segmentation for Thai language
|
| 59 |
+
|
| 60 |
+
## Input
|
| 61 |
+
|
| 62 |
+
= **text**: Text that want to tokenize.
|
| 63 |
+
- **engine**: Sub word Tokenize Engine (default is tcc)
|
| 64 |
"""
|
| 65 |
return {"subwords": py_subword_tokenize(text=text, engine=engine)}
|
| 66 |
|
|
|
|
| 69 |
def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
|
| 70 |
"""
|
| 71 |
Thai sentence segmentation
|
| 72 |
+
|
| 73 |
+
## Input
|
| 74 |
+
|
| 75 |
+
= **text**: Text that want to tokenize.
|
| 76 |
+
- **engine**: Sentence Tokenize Engine (default is crfcut)
|
| 77 |
"""
|
| 78 |
return {"sents": py_sent_tokenize(text=text, engine=engine)}
|