import hashlib # <-- added import os import tempfile from datetime import datetime from pathlib import Path import datasets import gradio as gr import pandas as pd from huggingface_hub import HfApi, hf_hub_download # ------------------------------------------------------------ # Cloud‑friendly Q/A preference rater for **Hugging Face Spaces** # ------------------------------------------------------------ # This version swaps local CSV persistence for a tiny remote‑dataset # workflow that works on Spaces: # • Ratings are stored in (and loaded from) a lightweight **dataset # repo** on the Hugging Face Hub – no local file system required. # • The dataset repo is set via the `RATINGS_REPO` env‑var. # • You must pass a write‑enabled token (env‑var `HF_TOKEN`) that has # `write` permission on that dataset. # # Quick setup guide # ----------------- # 1. Create a dataset repository to hold the ratings file, e.g.: # https://huggingface.co/datasets//qa‑rater‑data # 2. Inside **Space Settings ▸ Secrets**, add: # • `RATINGS_REPO` → /qa‑rater‑data # • `HF_TOKEN` → a token with *Write* access to that repo # 3. Add `huggingface‑hub` to your `requirements.txt` or # `pip install huggingface‑hub` locally. # 4. Deploy / push your updated Space – ratings will now persist in # the dataset repo instead of the Space’s ephemeral storage. # ------------------------------------------------------------ # ----------------------------------------------------------------------------- # Configuration – constants & styling # ----------------------------------------------------------------------------- DATA_PATH = "human_judgement/selected_samples.json" RATINGS_FILE = ( "human_judgement_irish_grammatical_test.csv" # Name *inside* the dataset repo ) # RATINGS_REPO = os.getenv("RATINGS_REPO") # e.g. "org/qa‑rater‑data" RATINGS_REPO = None HF_TOKEN = os.getenv("HF_TOKEN") # write token for that repo MAX_HEIGHT_PX = 400 # Max visible height for answer Markdown blocks api = HfApi(token=HF_TOKEN) if HF_TOKEN else None # ----------------------------------------------------------------------------- # Helper functions – data I/O # ----------------------------------------------------------------------------- def user_bucket(user_id: str, buckets: int = 10) -> int: """Deterministically map user_id to 1..buckets.""" h = hashlib.sha256(user_id.encode("utf-8")).hexdigest() return (int(h, 16) % buckets) + 1 def load_data(user_id: str) -> pd.DataFrame: """ Load the split of the dataset assigned to this user (1..10). Tries several common split naming patterns; falls back to 'train'. """ """ bucket = user_bucket(user_id) patterns = [ # f"split{bucket}", # f"split_{bucket}", # f"fold{bucket}", # f"fold_{bucket}", # f"part{bucket}", f"part_{bucket}", # f"{bucket}", ] for split_name in patterns: try: ds = datasets.load_dataset("tktung/irish_grammar_test", split=split_name) df = pd.DataFrame(ds) break except Exception: df = None if df is None: # Fallback ds = datasets.load_dataset("tktung/irish_grammar_test", split="train") df = pd.DataFrame(ds) """ ds = datasets.load_dataset("tktung/irish_grammar_test", split="train") df = pd.DataFrame(ds) required = {"question", "response1", "response2"} if not required.issubset(df.columns): raise ValueError(f"Dataset must contain columns: {', '.join(required)}") return df # ---------- Rating persistence helpers --------------------------------------- def _download_remote_ratings() -> Path | None: """Try to fetch the current ratings file from the Hub; returns path or None.""" if not RATINGS_REPO: return None try: return Path( hf_hub_download( repo_id=RATINGS_REPO, filename=RATINGS_FILE, repo_type="dataset", token=HF_TOKEN, cache_dir=tempfile.gettempdir(), ) ) except Exception: # File/repo may not exist yet – caller will create empty DF. return None def load_ratings() -> pd.DataFrame: """Return ratings DataFrame from remote repo (or empty if none).""" remote = _download_remote_ratings() if remote and remote.exists(): df = pd.read_csv(remote) elif RATINGS_FILE and os.path.exists(RATINGS_FILE): # Running locally (dev) – load local file if present. df = pd.read_csv(RATINGS_FILE) else: df = pd.DataFrame( columns=[ "user_id", "user_bucket", # added "row_index", "choice", "timestamp", "proficiency", "is_native", "studied_second_level", "studied_third_level", "uses_for_work", "usage_frequency", ] ) # Backward compatibility: ensure new columns exist required_cols = [ "proficiency", "is_native", "studied_second_level", "studied_third_level", "uses_for_work", "usage_frequency", "user_bucket", # added ] for col in required_cols: if col not in df.columns: df[col] = pd.NA return df def _upload_remote_ratings(df: pd.DataFrame): """Upload CSV to the dataset repo with a commit per save.""" if not (RATINGS_REPO and api): # Running locally (dev) – save to a temp file for inspection. df.to_csv(RATINGS_FILE, index=False) return with tempfile.TemporaryDirectory() as tmpdir: csv_path = Path(tmpdir) / RATINGS_FILE csv_path.parent.mkdir(parents=True, exist_ok=True) df.to_csv(csv_path, index=False) api.upload_file( path_or_fileobj=str(csv_path), path_in_repo=RATINGS_FILE, repo_id=RATINGS_REPO, repo_type="dataset", commit_message="Add/Update rating", ) def save_rating( user_id: str, proficiency: str, is_native: str, studied_second_level: str, studied_third_level: str, uses_for_work: str, usage_frequency: str, row_index: int, choice: int, ): """Append a rating (deduplicated) and push to the Hub (stores demographics).""" ratings = load_ratings() duplicate = (ratings.user_id == user_id) & (ratings.row_index == row_index) if duplicate.any(): return norm = lambda x: (x or "").strip().lower() bucket = user_bucket(user_id) # added new_entry = { "user_id": user_id, "user_bucket": bucket, # added "proficiency": norm(proficiency), "is_native": norm(is_native), "studied_second_level": norm(studied_second_level), "studied_third_level": norm(studied_third_level), "uses_for_work": norm(uses_for_work), "usage_frequency": norm(usage_frequency), "row_index": row_index, "choice": choice, "timestamp": datetime.utcnow().isoformat(), } ratings = pd.concat([ratings, pd.DataFrame([new_entry])], ignore_index=True) _upload_remote_ratings(ratings) def get_next_unrated(df: pd.DataFrame, ratings: pd.DataFrame, user_id: str): rated = ratings.loc[ratings.user_id == user_id, "row_index"].tolist() unrated = df[~df.index.isin(rated)] if unrated.empty: return None row = unrated.iloc[0] return row.name, row.question, row.response1, row.response2 def user_progress(user_id: str, state_df) -> str: """Return progress string for this user.""" if not isinstance(state_df, pd.DataFrame): return "Progress: 0 / 0" ratings = load_ratings() rated = ratings.loc[ratings.user_id == user_id, "row_index"].nunique() total = len(state_df) if total == 0: return "Progress: 0 / 0" return f"Progress: {rated} / {total} ({rated/total:.1%})" # ----------------------------------------------------------------------------- # Gradio callbacks # ----------------------------------------------------------------------------- def start_or_resume( user_id: str, proficiency: str, is_native: str, studied_second_level: str, studied_third_level: str, uses_for_work: str, usage_frequency: str, consent: bool, state_df, # may be None before first load ): # If dataset not yet loaded for this session, load user-specific split if not isinstance(state_df, pd.DataFrame): try: state_df = load_data(user_id) except Exception as e: progress = user_progress(user_id, state_df) return ( gr.update(value=user_id, visible=True), gr.update(visible=False), gr.update(visible=False), "", "", "", "", state_df, progress, f"Dataset load failed: {e}", ) progress = user_progress(user_id, state_df) # ...existing validation blocks updated to include progress... if not user_id.strip(): return ( gr.update(value=user_id, visible=True), gr.update(visible=False), gr.update(visible=False), "", "", "", "", state_df, progress, "Please enter a non-empty identifier to begin.", ) if proficiency not in {"expert", "fluent", "basic"}: return ( gr.update(value=user_id, visible=True), gr.update(visible=False), gr.update(visible=False), "", "", "", "", state_df, progress, "Please select your language proficiency.", ) required_yes_no = { is_native: "Is Native?", studied_second_level: "Studied Irish At Second Level?", studied_third_level: "Studied Irish At Third Level?", uses_for_work: "Use Irish for work?", } for val, label in required_yes_no.items(): if val not in {"Yes", "No"}: return ( gr.update(value=user_id, visible=True), gr.update(visible=False), gr.update(visible=False), "", "", "", "", state_df, progress, f"Please answer: {label}", ) if usage_frequency not in {"daily", "weekly", "monthly", "yearly"}: return ( gr.update(value=user_id, visible=True), gr.update(visible=False), gr.update(visible=False), "", "", "", "", state_df, progress, "Please select usage frequency.", ) if not consent: return ( gr.update(value=user_id, visible=True), gr.update(visible=False), gr.update(visible=False), "", "", "", "", state_df, progress, "Please provide consent to proceed.", ) ratings = load_ratings() record = get_next_unrated(state_df, ratings, user_id) if record is None: progress = user_progress(user_id, state_df) return ( gr.update(value=user_id, visible=True), gr.update(visible=False), gr.update(visible=False), "", "", "", "", state_df, progress, "🎉 You have evaluated every item – thank you!", ) idx, q, a1, a2 = record progress = user_progress(user_id, state_df) return ( gr.update(value=user_id, visible=True), gr.update(visible=True), gr.update(visible=True), "**" + q + "**", a1, a2, str(idx), state_df, progress, "", ) def submit_preference( user_id: str, proficiency: str, is_native: str, studied_second_level: str, studied_third_level: str, uses_for_work: str, usage_frequency: str, row_idx_str: str, choice: str, state_df, ): if choice not in {"Sentence 1", "Sentence 2"}: progress = user_progress(user_id, state_df) return ( "", "", "", "", progress, "Please choose either Sentence 1 or Sentence 2 before submitting.", ) row_idx = int(row_idx_str) save_rating( user_id, proficiency, is_native, studied_second_level, studied_third_level, uses_for_work, usage_frequency, row_idx, 1 if choice == "Sentence 1" else 2, ) ratings = load_ratings() record = get_next_unrated(state_df, ratings, user_id) progress = user_progress(user_id, state_df) if record is None: return "", "", "", "", progress, "🎉 You have evaluated every item – thank you!" idx, q, a1, a2 = record return "**" + q + "**", a1, a2, str(idx), progress, "" # ----------------------------------------------------------------------------- # Build Gradio interface # ----------------------------------------------------------------------------- def build_demo(): # Removed upfront dataset load; defer until user supplies ID # df = load_data() # CSS to constrain very tall answers overflow_css = f""" """ with gr.Blocks(title="Question/Answer Preference Rater") as demo: gr.HTML(overflow_css) gr.Markdown( """# Irish Grammatical Test Enter your identifier below to start or resume. Each sample is a pair of two sentences that varied by a grammatical feature. You should choose the one that you think is correct. Your progress is saved automatically so you can return at any time using the same identifier.""" ) state_df = gr.State(None) # will be filled after hashing user_id state_row_idx = gr.State("") # Identifier input id_input = gr.Textbox(label="User Identifier", placeholder="e.g. alice") proficiency_radio = gr.Radio( ["expert", "fluent", "basic"], label="Language proficiency", info="Select your Irish language proficiency level.", ) is_native_radio = gr.Radio( ["Yes", "No"], label="Is Native?", info="Are you a native Irish speaker?" ) studied_second_radio = gr.Radio( ["Yes", "No"], label="Studied Irish At Second Level?", info="Did you study Irish in school?", ) studied_third_radio = gr.Radio( ["Yes", "No"], label="Studied Irish At Third Level?", info="Did you study Irish at university/third level?", ) uses_for_work_radio = gr.Radio( ["Yes", "No"], label="Use Irish for work?", info="Do you use Irish in your job?", ) usage_frequency_radio = gr.Radio( ["daily", "weekly", "monthly", "yearly"], label="How often do you use Irish?", ) consent_checkbox = gr.Checkbox( # <-- added label="I consent to the use of my responses for research purposes." ) start_btn = gr.Button("Start / Resume") info_md = gr.Markdown("") progress_md = gr.Markdown("Progress: 0 / 0") # <-- added # Evaluation widgets with gr.Column(visible=False) as eval_col: question_md = gr.Markdown("") with gr.Row(): answer1_md = gr.Markdown(label="Sentence A", elem_classes=["answerbox"]) answer2_md = gr.Markdown(label="Sentence B", elem_classes=["answerbox"]) choice_radio = gr.Radio( ["Sentence 1", "Sentence 2"], label="Which sentence is more grammatically correct?", ) submit_btn = gr.Button("Submit Preference", visible=False) # Callbacks wiring (added progress_md in outputs) start_btn.click( fn=start_or_resume, inputs=[ id_input, proficiency_radio, is_native_radio, studied_second_radio, studied_third_radio, uses_for_work_radio, usage_frequency_radio, consent_checkbox, state_df, ], outputs=[ id_input, eval_col, submit_btn, question_md, answer1_md, answer2_md, state_row_idx, state_df, progress_md, # <-- added info_md, ], ) submit_btn.click( fn=submit_preference, inputs=[ id_input, proficiency_radio, is_native_radio, studied_second_radio, studied_third_radio, uses_for_work_radio, usage_frequency_radio, state_row_idx, choice_radio, state_df, ], outputs=[ question_md, answer1_md, answer2_md, state_row_idx, progress_md, # <-- added info_md, ], ) return demo if __name__ == "__main__": build_demo().launch()