# app/utils/district_matcher.py """ District Matcher - Noto'g'ri yozilgan tuman nomlarini topish Fuzzy matching ishlatiladi """ import logging from typing import Optional from difflib import SequenceMatcher logger = logging.getLogger(__name__) # Toshkent tumanlari (barcha variantlar bilan) # DISTRICT_VARIANTS - KO'PROQ VARIANTLAR BILAN DISTRICT_VARIANTS = { "chilonzor": [ "chilonzor", "chilanazor", "chillonzor", "chilanzor", "chilinzor", "chilanzar", "chilinzar", "chilonzar", "chilanzur" ], "yunusobod": [ "yunusobod", "yunusabad", "yunusabod", "yunusobod", "iunusobod", "yunus obod", "yunus abad", "yunusabat", "iunusabad" ], "mirzo_ulugbek": [ "mirzo ulugbek", "mirzo ulug'bek", "mirzo ulugʻbek", "mirza ulugbek", "ttg", "mirzo ulug bek", "mirza ulug'bek", "ulugbek", "ulug'bek" ], "shayxontohur": [ "shayxontohur", "shayxontoxur", "shayhontohur", "shayxantoxur", "sayxontohur", "sheyhontoxur", "shayxon tohur", "shayxon toxur", "shayx tohur", "shayx toxur" ], "yakkasaroy": [ "yakkasaroy", "yakkasaray", "yakasaroy", "yakkosaroy", "iakkasaroy", "yakka saroy", "yakka saray", "yakkasarai" ], "mirobod": [ "mirobod", "mirabod", "mirobad", "mirabod", "mirobad", "mir obod", "mir abad", "mirabat" ], "yashnobod": [ "yashnobod", "yashnabad", "yeshnobod", "yashnabod", "yashnobad", "yash nobod", "yash nabad", "yashnabat" ], "sergeli": [ "sergeli", "sergili", "sirgeli", "sergeley", "sirgili", "sergel", "sergil" ], "bektemir": [ "bektemir", "bektemar", "bektimir", "bektamir", "bektemur", "bek temir", "bek tamir", "bektamur" ], "uchtepa": [ "uchtepa", "uchtepe", "uchtepa", "uchtipi", "uchtepo", "uch tepa", "uch tepe", "uchtipa" ], "olmazor": [ "olmazor", "olmazor", "almazor", "olmozor", "almazor", "olma zor", "alma zor", "olmazar" ], "yangihayot": [ "yangihayot", "yangihayat", "yangi hayot", "yangixayot", "yangihoyot", "yangi xayot", "yangi hayat", "yangihayat" ] } def normalize_text(text: str) -> str: """ Matnni normalizatsiya qilish (kichik harf, probel olib tashlash) Args: text: Asl matn Returns: Normalized matn """ if not text: return "" # Kichik harf text = text.lower().strip() # Ko'p probellarni bitta probelga text = " ".join(text.split()) # "tumani" so'zini olib tashlash text = text.replace(" tumani", "").replace(" tuman", "") return text def similarity_score(str1: str, str2: str) -> float: """ Ikki string orasidagi o'xshashlik (0.0 - 1.0) Args: str1: Birinchi string str2: Ikkinchi string Returns: Similarity score (1.0 = 100% o'xshash) """ return SequenceMatcher(None, str1, str2).ratio() def find_district_fuzzy(user_text: str, threshold: float = 0.5) -> Optional[str]: """ Noto'g'ri yozilgan tuman nomini topish (YAXSHILANGAN FUZZY MATCHING) Args: user_text: Bemorning kiritgan matni (masalan: "chillonzor" yoki "yunusabad") threshold: Minimal o'xshashlik darajasi (0.5 = 50%) ← PASTROQ! Returns: District ID (masalan: "chilonzor") yoki None """ try: if not user_text: return None # Matnni normalizatsiya qilish normalized_input = normalize_text(user_text) logger.info(f"🏙️ Tuman qidirilmoqda: '{user_text}' → '{normalized_input}'") if len(normalized_input) < 3: logger.warning("⚠️ Matn juda qisqa") return None # Eng yaxshi moslikni topish best_match = None best_score = 0.0 for district_id, variants in DISTRICT_VARIANTS.items(): for variant in variants: # 1. To'liq fuzzy match score = similarity_score(normalized_input, variant) # 2. Substring match (bonus) if normalized_input in variant or variant in normalized_input: score = max(score, 0.85) # 3. So'z boshi match (bonus) if variant.startswith(normalized_input[:4]) or normalized_input.startswith(variant[:4]): score = max(score, 0.75) if score > best_score: best_score = score best_match = district_id # Threshold tekshirish if best_score >= threshold: logger.info(f"✅ Tuman topildi: '{best_match}' (score: {best_score:.2f})") return best_match else: logger.warning(f"⚠️ Tuman topilmadi (best score: {best_score:.2f} < {threshold})") return None except Exception as e: logger.error(f"❌ District matching xatoligi: {e}") return None def get_district_display_name(district_id: str) -> str: """ District ID'dan to'liq nom olish Args: district_id: "chilonzor" Returns: "Chilonzor tumani" """ district_names = { "chilonzor": "Chilonzor tumani", "yunusobod": "Yunusobod tumani", "mirzo_ulugbek": "Mirzo Ulug'bek tumani", "shayxontohur": "Shayxontohur tumani", "yakkasaroy": "Yakkasaroy tumani", "mirobod": "Mirobod tumani", "yashnobod": "Yashnobod tumani", "sergeli": "Sergeli tumani", "bektemir": "Bektemir tumani", "uchtepa": "Uchtepa tumani", "olmazor": "Olmazor tumani", "yangihayot": "Yangihayot tumani" } return district_names.get(district_id, district_id) def list_all_districts_text() -> str: """ Barcha tumanlar nomini matn sifatida qaytarish (AI uchun) Returns: "Chilonzor, Yunusobod, Mirzo Ulug'bek, ..." """ districts = [ "Chilonzor", "Yunusobod", "Mirzo Ulug'bek", "Shayxontohur", "Yakkasaroy", "Mirobod", "Yashnobod", "Sergeli", "Bektemir", "Uchtepa", "Olmazor", "Yangihayot" ] return ", ".join(districts)