import gradio as gr import pandas as pd import numpy as np import os from datetime import datetime DATA_PATH = os.getenv("DATA_PATH", "dataset.csv") HAIR_MAP = { "blonde": ["blonde", "light blonde", "dirty blonde", "fair"], "brown": ["brown", "dark brown", "light brown", "brunette"], "black": ["black", "jet black"], "red": ["red", "ginger", "auburn"], "gray": ["gray", "grey", "salt and pepper", "white"], "other": ["dyed", "colorful", "blue", "pink", "green", "purple", "rainbow"] } # Country -> broad region mapping (very coarse; adjust as needed) REGION_MAP = { "europe": {"AL","AD","AM","AT","AZ","BY","BE","BA","BG","HR","CY","CZ","DK","EE","FI","FR","GE","DE","GR","HU","IS","IE","IT","KZ","XK","LV","LI","LT","LU","MT","MD","MC","ME","NL","MK","NO","PL","PT","RO","RU","SM","RS","SK","SI","ES","SE","CH","TR","UA","GB","UK"}, "north america": {"US","CA","MX"}, "latin america": {"AR","BO","BR","CL","CO","CR","CU","DO","EC","SV","GT","HN","NI","PA","PY","PE","PR","UY","VE"}, "asia": {"CN","JP","KR","IN","BD","PK","ID","PH","VN","TH","MY","SG","HK","TW","LA","KH","MM","NP","LK","MN"}, "middle east": {"IL","JO","LB","EG","SA","AE","QA","BH","OM","KW","IR","IQ","YE","PS","SY"}, "africa": {"DZ","AO","BJ","BW","BF","BI","CM","CV","CF","TD","KM","CG","CD","CI","DJ","GQ","ER","ET","GA","GM","GH","GN","GW","KE","LS","LR","LY","MG","MW","ML","MR","MU","MA","MZ","NA","NE","NG","RW","SN","SC","SL","SO","ZA","SS","SD","TZ","TG","TN","UG","ZM","ZW"}, "oceania": {"AU","NZ","FJ","PG","WS","TO","SB","TV","VU","NC"} } def normalize_region(country_code: str): if not isinstance(country_code, str) or len(country_code) < 2: return "unknown" cc = country_code.upper() for region, codes in REGION_MAP.items(): if cc in codes: return region return "unknown" def normalize_hair(h: str): if not isinstance(h, str) or not h.strip(): return "unknown" s = h.strip().lower() for k, vals in HAIR_MAP.items(): if any(v in s for v in vals): return k # catch common colors if "blond" in s: return "blonde" return s def compute_age(dob_str: str): try: # Expecting YYYY-MM-DD or YYYY format if not isinstance(dob_str, str): return None if len(dob_str) == 4: year = int(dob_str) return datetime.now().year - year return int((datetime.now() - datetime.fromisoformat(dob_str)).days / 365.25) except Exception: return None def load_data(): df = pd.read_csv(DATA_PATH) # expected columns: id,name,hair_color_raw,country_code,dob,source_url,gender # normalize fields for search df["hair_color"] = df["hair_color_raw"].apply(normalize_hair) df["region"] = df["country_code"].apply(normalize_region) df["age"] = df["dob"].apply(compute_age) # filter obvious under-18 if age detected df = df[(df["age"].isna()) | (df["age"] >= 18)] return df DF = None def search(hair_color, region, age_range, top_k, is_adult, gender): if not is_adult: return "גישה חסומה: חובה לאשר גיל 18+ כדי לראות תוצאות." global DF if DF is None: DF = load_data() df = DF.copy() # Optional gender filter (default: female) if gender and gender != "any": df = df[df["gender"].str.lower() == gender] # Filter by hair color if hair_color and hair_color != "any": df = df[df["hair_color"] == hair_color] # Filter by region (computed from country code) if region and region != "any": df = df[df["region"] == region] # Age filter amin, amax = age_range df = df[df["age"].between(amin, amax, inclusive="both") | df["age"].isna()] if df.empty: return "לא נמצאו תוצאות בהתאמה לבקשה." # Simple scoring: hair match (1), region match (0.6), closeness to midpoint of age range (0.4) def score_row(row): s = 0.0 s += 1.0 if (hair_color == "any" or row["hair_color"] == hair_color) else 0.0 s += 0.6 if (region == "any" or row["region"] == region) else 0.0 if pd.notna(row["age"]): mid = (amin + amax) / 2.0 s += max(0.0, 0.4 - abs(row["age"] - mid) * 0.02) # 2% penalty per year away return s df["score"] = df.apply(score_row, axis=1) df = df.sort_values(["score","name"], ascending=[False, True]).head(int(top_k)) # Return a compact table out = df[["name","hair_color","region","age","country_code","source_url"]].reset_index(drop=True) return out hair_choices = ["any","blonde","brown","black","red","gray","other"] region_choices = ["any","europe","north america","latin america","asia","middle east","africa","oceania"] gender_choices = ["any","female","male","nonbinary"] with gr.Blocks() as demo: gr.Markdown("## Adult Performer Finder (18+)\nבחר/י צבע שיער, אזור ותווך גילאים כדי לקבל רשימה של פרפורמרים תואמים. חובה לאשר **18+**.") with gr.Row(): hair = gr.Dropdown(hair_choices, value="any", label="Hair color") region = gr.Dropdown(region_choices, value="any", label="Region") gender = gr.Dropdown(gender_choices, value="female", label="Gender (optional)") age = gr.Slider(18, 80, value=(21, 40), step=1, label="Age Range") topk = gr.Radio(choices=["3","5","10"], value="5", label="How many results?") adult = gr.Checkbox(False, label="I confirm I am 18+ (required)") btn = gr.Button("Search") out = gr.Dataframe(headers=["name","hair_color","region","age","country_code","source_url"], wrap=True) btn.click(fn=search, inputs=[hair, region, age, topk, adult, gender], outputs=out) if __name__ == "__main__": demo.launch()