Map-NEO / generate_text.py

Upload folder using huggingface_hub

a683148 verified 3 months ago

9.22 kB

	# generate_text.py - Improved text generation with advanced sampling
	import torch
	from transformers import AutoTokenizer
	from model_neo import NeoMini, NeoMiniConfig
	import json
	import os
	from pathlib import Path

	def load_model(checkpoint_path="checkpoints/extended_context_model.pt"):
	"""Load trained model and tokenizer"""
	print(f"Loading model from {checkpoint_path}...")

	# Check if checkpoint exists
	if not os.path.exists(checkpoint_path):
	print(f"Error: Checkpoint not found at {checkpoint_path}")
	print("Available checkpoints:")
	checkpoint_dir = Path("checkpoints")
	if checkpoint_dir.exists():
	for ckpt in sorted(checkpoint_dir.glob("checkpoint_step_*.pt")):
	print(f" - {ckpt}")
	return None, None

	# Load checkpoint
	checkpoint = torch.load(checkpoint_path, map_location="cuda" if torch.cuda.is_available() else "cpu")

	# Create model with same config
	config = NeoMiniConfig()
	model = NeoMini(config)
	model.load_state_dict(checkpoint['model_state_dict'])
	model.eval()

	# Move to GPU if available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = model.to(device)
	print(f"Model loaded on {device}")

	# Load tokenizer
	tokenizer_path = "data/tokenizer"
	if os.path.exists(tokenizer_path):
	tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
	else:
	print("Using GPT-2 tokenizer as fallback...")
	tokenizer = AutoTokenizer.from_pretrained("gpt2")
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	print(f"Tokenizer vocab size: {tokenizer.vocab_size}")
	print(f"Model parameters: {model.get_num_params():,}")

	return model, tokenizer

	def generate_text(model, tokenizer, prompt, max_length=100,
	temperature=0.7, # Lower = more focused
	top_k=50, # Only consider top 50 tokens
	top_p=0.9, # Nucleus sampling
	repetition_penalty=1.1): # Penalize repetition
	"""Generate text with advanced sampling techniques"""

	device = next(model.parameters()).device
	input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
	original_length = input_ids.size(1)

	print(f"Generating with: temp={temperature}, top_k={top_k}, top_p={top_p}")

	with torch.no_grad():
	for step in range(max_length):
	# Forward pass
	logits = model(input_ids)
	next_token_logits = logits[0, -1, :] / temperature

	# Apply repetition penalty
	if repetition_penalty != 1.0:
	for token_id in set(input_ids[0].tolist()):
	if next_token_logits[token_id] < 0:
	next_token_logits[token_id] *= repetition_penalty
	else:
	next_token_logits[token_id] /= repetition_penalty

	# Top-k filtering
	if top_k > 0:
	top_k_logits, _ = torch.topk(next_token_logits, top_k)
	min_top_k = top_k_logits[-1]
	next_token_logits[next_token_logits < min_top_k] = float('-inf')

	# Top-p (nucleus) sampling
	if top_p < 1.0:
	sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
	cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)

	# Remove tokens with cumulative probability above the threshold
	sorted_indices_to_remove = cumulative_probs > top_p
	# Keep at least one token
	sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
	sorted_indices_to_remove[..., 0] = 0

	# Convert back to original indices
	indices_to_remove = sorted_indices[sorted_indices_to_remove]
	next_token_logits[indices_to_remove] = float('-inf')

	# Sample next token
	probs = torch.softmax(next_token_logits, dim=-1)
	next_token = torch.multinomial(probs, num_samples=1)

	# Append to sequence
	input_ids = torch.cat([input_ids, next_token.unsqueeze(0)], dim=1)

	# Check for EOS token
	if next_token.item() == tokenizer.eos_token_id:
	print(f" → Stopped at EOS token (step {step+1})")
	break

	# Check for max context length
	if input_ids.size(1) >= 1024: # Model's max context
	print(f" → Stopped at max context length (step {step+1})")
	break

	return tokenizer.decode(input_ids[0], skip_special_tokens=True)

	def compare_generation_settings(model, tokenizer, prompt):
	"""Compare different generation settings"""
	print(f"\n{'='*80}")
	print(f"COMPARING GENERATION SETTINGS")
	print(f"Prompt: '{prompt}'")
	print(f"{'='*80}")

	settings = [
	{"name": "Conservative", "temp": 0.5, "top_k": 20, "top_p": 0.8},
	{"name": "Balanced", "temp": 0.7, "top_k": 50, "top_p": 0.9},
	{"name": "Creative", "temp": 0.9, "top_k": 100, "top_p": 0.95},
	{"name": "Focused", "temp": 0.3, "top_k": 10, "top_p": 0.7}
	]

	for setting in settings:
	print(f"\n--- {setting['name']} Generation ---")
	generated = generate_text(
	model, tokenizer, prompt, max_length=80,
	temperature=setting['temp'],
	top_k=setting['top_k'],
	top_p=setting['top_p']
	)
	# Only show the generated part (after prompt)
	generated_only = generated[len(prompt):].strip()
	print(f"Output: {generated_only}")

	def interactive_mode(model, tokenizer):
	"""Interactive text generation"""
	print(f"\n{'='*60}")
	print("INTERACTIVE MODE - Enter prompts (or 'quit' to exit)")
	print(f"{'='*60}")

	while True:
	try:
	prompt = input("\nEnter your prompt: ").strip()
	if prompt.lower() in ['quit', 'exit', 'q']:
	break

	if not prompt:
	continue

	# Get generation parameters
	try:
	temp = float(input("Temperature (0.1-1.5, default 0.7): ") or "0.7")
	top_k = int(input("Top-K (1-100, default 50): ") or "50")
	top_p = float(input("Top-P (0.1-1.0, default 0.9): ") or "0.9")
	max_len = int(input("Max length (10-200, default 100): ") or "100")
	except ValueError:
	print("Using default parameters...")
	temp, top_k, top_p, max_len = 0.7, 50, 0.9, 100

	print(f"\nGenerating...")
	generated = generate_text(
	model, tokenizer, prompt,
	max_length=max_len, temperature=temp,
	top_k=top_k, top_p=top_p
	)

	print(f"\nFull Output:\n{'-'*40}")
	print(generated)
	print(f"{'-'*40}")

	except KeyboardInterrupt:
	break

	print("\nExiting interactive mode...")

	def main():
	print("MAP-NEO Mini Text Generator")
	print("=" * 50)

	# Load model
	model, tokenizer = load_model()

	if model is None or tokenizer is None:
	print("Failed to load model. Exiting.")
	return

	# Test prompts
	test_prompts = [
	"The future of artificial intelligence",
	"In a world where technology",
	"Scientists have discovered",
	"The key to success is",
	"Climate change is",
	"The importance of education",
	"Once upon a time, there was",
	"To solve this problem, we need to"
	]

	print(f"\n{'='*60}")
	print("BASIC GENERATION TEST")
	print(f"{'='*60}")

	# Test basic generation
	for i, prompt in enumerate(test_prompts[:3], 1):
	print(f"\n--- Test {i}/3 ---")
	print(f"Prompt: {prompt}")
	print("-" * 50)

	generated = generate_text(
	model, tokenizer, prompt,
	max_length=80, temperature=0.7,
	top_k=50, top_p=0.9
	)

	# Show only generated part
	generated_only = generated[len(prompt):].strip()
	print(f"Generated: {generated_only}")

	# Compare settings
	compare_generation_settings(
	model, tokenizer,
	"The most important discovery in science was"
	)

	# Interactive mode
	print(f"\n{'='*60}")
	choice = input("Start interactive mode? (y/n): ").lower().strip()
	if choice in ['y', 'yes']:
	interactive_mode(model, tokenizer)

	print("\nText generation complete!")

	if __name__ == "__main__":
	main()