import os from transformers import pipeline import torch import google.generativeai as genai # --- Configuration --- # Using the standard Qwen 2.5 Coder 0.5B Instruct model (Native PyTorch) REPO_ID = "Qwen/Qwen2.5-Coder-0.5B-Instruct" print(f"Initializing Clarity AI Engine (Transformers)...") print(f"Target Model: {REPO_ID}") pipe = None def load_model(): """ Lazy-loads the model pipeline. """ global pipe if pipe is not None: return pipe print(f"Initializing Clarity AI Engine (Transformers)...") print(f"Target Model: {REPO_ID}") try: print("Loading model...") # Initialize the pipeline pipe = pipeline( "text-generation", model=REPO_ID, torch_dtype="auto", device_map="auto" ) print("Success: Clarity AI Model loaded.") # Warm-up inference print("Warming up model...") warmup_msg = [{"role": "user", "content": "print('hello')"}] pipe(warmup_msg, max_new_tokens=10) print("Model warmup complete.") except Exception as e: print(f"CRITICAL ERROR: Failed to load model. {e}") pipe = None return pipe def detect_language(code: str) -> dict: """ Heuristic detection for LeetCode-supported languages. """ code = code.strip() # C / C++ if "#include" in code or "using namespace std" in code or "std::" in code: return {"name": "C++", "ext": "cpp"} if "printf" in code and "#include " in code: return {"name": "C", "ext": "c"} # Java / C# if "public class" in code: if "System.out.println" in code or "public static void main" in code: return {"name": "Java", "ext": "java"} if "Console.WriteLine" in code or "namespace " in code or "using System" in code: return {"name": "C#", "ext": "cs"} # Python if "def " in code and ":" in code: return {"name": "Python", "ext": "py"} # JS / TS if "console.log" in code or "const " in code or "let " in code or "function" in code: if ": number" in code or ": string" in code or "interface " in code: return {"name": "TypeScript", "ext": "ts"} return {"name": "JavaScript", "ext": "js"} # Go if "package main" in code or "func main" in code or "fmt.Print" in code: return {"name": "Go", "ext": "go"} # Rust if "fn " in code and ("let mut" in code or "println!" in code or "Vec<" in code): return {"name": "Rust", "ext": "rs"} # PHP if "" in code: # Swift return type arrow return {"name": "Swift", "ext": "swift"} # Kotlin if "fun " in code and ("val " in code or "var " in code) and "println(" in code: return {"name": "Kotlin", "ext": "kt"} # Dart if "void main()" in code and "print(" in code and ";" in code: return {"name": "Dart", "ext": "dart"} # Scala if "object " in code or "def main" in code or "val " in code and "println" in code: return {"name": "Scala", "ext": "scala"} # Elixir if "defmodule" in code or "defp" in code or "IO.puts" in code or ":ok" in code: return {"name": "Elixir", "ext": "ex"} # Erlang if "-module" in code or "-export" in code or "io:format" in code: return {"name": "Erlang", "ext": "erl"} # Racket / Lisp if "(define" in code or "(lambda" in code or "#lang racket" in code: return {"name": "Racket", "ext": "rkt"} # Fallback return {"name": "Text", "ext": "txt"} def correct_code_with_ai(code: str) -> dict: """ Takes a buggy code snippet and returns a corrected version using the Qwen model. """ detected_lang = detect_language(code) # Lazy Load current_pipe = load_model() if not current_pipe: return { "code": "# Model failed to load. Check server logs.", "language": detected_lang } # Stricter System Prompt with Educational Persona system_prompt = ( "You are Clarity, an intelligent coding assistant designed for students and junior developers. " "You were created by a team of college students (see projects.md) for a minor project to help peers write better code.\n\n" "Your Mission:\n" "1. **Review & Fix:** Correct syntax and logical errors.\n" "2. **Educate:** Improve variable naming (use industry standards like Google Style Guide), readability, and structure.\n" "3. **Optimize:** Remove redundancy and improve logic.\n" "4. **Be Concise:** Provide objective, short, and high-value feedback. Avoid long lectures.\n\n" "Guidelines:\n" "- **Style:** Follow the Google Style Guide for the respective language.\n" "- **Comments:** Add comments ONLY for complex logic or educational 'aha!' moments.\n" "- **Tone:** Concise, Objective, and Mentor-like.\n" "- **Identity:** You are 'Clarity'. If asked about your version, refer users to the GitHub repo. If asked non-code questions, answer only if factual and harmless; otherwise, politely decline.\n\n" "Constraint: Return ONLY the corrected code with necessary educational comments inline. Do not output a separate explanation block unless absolutely necessary for a critical concept." ) # One-shot example to force the pattern (Input -> Code Only) example_input = "def sum(a,b): return a+b" if detected_lang["name"] == "Python" else "int sum(int a, int b) { return a+b; }" example_output = ( "def sum(operand_a, operand_b):\n" " # Descriptive names improve readability\n" " return operand_a + operand_b" ) if detected_lang["name"] == "Python" else ( "int sum(int operand_a, int operand_b) {\n" " // Descriptive names improve readability\n" " return operand_a + operand_b;\n" "}" ) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": example_input}, {"role": "assistant", "content": example_output}, {"role": "user", "content": code} ] try: # Transformers pipeline inference outputs = pipe( messages, max_new_tokens=1024, # Optimized for 1.5B speed temperature=0.1, # Lower temperature for stricter adherence do_sample=True, # Required for temperature usage ) # Extract content # Pipeline with list of messages returns a list containing one dict, which contains 'generated_text'. # 'generated_text' is the list of messages (history + new response). response_content = outputs[0]["generated_text"][-1]["content"] # Clean up (double check for markdown or chatty intros) cleaned_response = response_content.strip() # Aggressive stripping of "Here is the code..." or markdown if "```" in cleaned_response: lines = cleaned_response.split("\n") # Remove starting markdown if lines[0].strip().startswith("```"): lines = lines[1:] # Remove ending markdown if lines and lines[-1].strip().startswith("```"): lines = lines[:-1] # Remove common chatty prefixes if they slipped through if lines and (lines[0].lower().startswith("here is") or lines[0].lower().startswith("sure")): lines = lines[1:] cleaned_response = "\n".join(lines).strip() # Run detection on the CLEAN, CORRECTED code for maximum accuracy detected_lang = detect_language(cleaned_response) return { "code": cleaned_response, "language": detected_lang } except Exception as e: print(f"Inference Error: {e}") return { "code": f"# An error occurred during processing: {str(e)}", "language": detected_lang } def correct_code_with_gemini(code: str, api_key: str = None, model_name: str = None) -> dict: """ Uses Google's Gemini model for code correction. """ detected_lang = detect_language(code) # 1. Resolve API Key final_key = api_key if api_key else os.environ.get("GOOGLE_API_KEY") if not final_key: return { "code": "# Error: No Google API Key provided. Please add it in Settings.", "language": detected_lang } try: genai.configure(api_key=final_key) # Resolve Model Name (Default to flash-latest if not provided) target_model = model_name if model_name else os.environ.get("GOOGLE_MODEL_NAME", "gemini-flash-latest") # Stricter System Prompt (Same as Local) system_prompt = ( "You are Clarity, an intelligent coding assistant designed for students and junior developers. " "You were created by a team of college students (see projects.md) for a minor project to help peers write better code.\n\n" "Your Mission:\n" "1. **Review & Fix:** Correct syntax and logical errors.\n" "2. **Educate:** Improve variable naming (use industry standards like Google Style Guide), readability, and structure.\n" "3. **Optimize:** Remove redundancy and improve logic.\n" "4. **Be Concise:** Provide objective, short, and high-value feedback. Avoid long lectures.\n\n" "Guidelines:\n" "- **Style:** Follow the Google Style Guide for the respective language.\n" "- **Comments:** Add comments ONLY for complex logic or educational 'aha!' moments.\n" "- **Tone:** Concise, Objective, and Mentor-like.\n" "- **Identity:** You are 'Clarity'. If asked about your version, refer users to the GitHub repo. If asked non-code questions, answer only if factual and harmless; otherwise, politely decline.\n\n" "Constraint: Return ONLY the corrected code with necessary educational comments inline. Do not output a separate explanation block unless absolutely necessary for a critical concept." ) model = genai.GenerativeModel( model_name=target_model, system_instruction=system_prompt ) # One-shot example example_input = "def sum(a,b): return a+b" if detected_lang["name"] == "Python" else "int sum(int a, int b) { return a+b; }" example_output = ( "def sum(operand_a, operand_b):\n" " # Descriptive names improve readability\n" " return operand_a + operand_b" ) if detected_lang["name"] == "Python" else ( "int sum(int operand_a, int operand_b) {\n" " // Descriptive names improve readability\n" " return operand_a + operand_b;\n" "}" ) # Start chat with history chat = model.start_chat(history=[ {"role": "user", "parts": [example_input]}, {"role": "model", "parts": [example_output]}, ]) response = chat.send_message(code) cleaned_response = response.text.strip() # Cleanup markdown if "```" in cleaned_response: lines = cleaned_response.split("\n") if lines[0].strip().startswith("```"): lines = lines[1:] if lines and lines[-1].strip().startswith("```"): lines = lines[:-1] cleaned_response = "\n".join(lines).strip() return { "code": cleaned_response, "language": detect_language(cleaned_response) } except Exception as e: print(f"Gemini Inference Error: {e}") return { "code": f"# Gemini Error: {str(e)}", "language": detected_lang } def get_gemini_models(api_key: str = None) -> list: """ Dynamically fetches available models from Google API. """ # 1. Resolve API Key final_key = api_key if api_key else os.environ.get("GOOGLE_API_KEY") if not final_key: return [] try: genai.configure(api_key=final_key) models = [] for m in genai.list_models(): if 'generateContent' in m.supported_generation_methods: # Strip 'models/' prefix for cleaner UI name = m.name.replace('models/', '') models.append(name) # Sort to put 'flash' and 'pro' models at the top for better UX models.sort(key=lambda x: (not 'flash' in x, not 'pro' in x, x)) return models except Exception as e: print(f"Error listing models: {e}") return []