#!/usr/bin/env python3 """ Japanese CLIP 使用例 """ import io import requests from PIL import Image import torch from transformers import AutoTokenizer, AutoModel from torchvision import transforms def preprocess_image(image, size=224): """画像前処理""" transform = transforms.Compose([ transforms.Resize((size, size)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) if image.mode != 'RGB': image = image.convert('RGB') return transform(image).unsqueeze(0) def main(): # デバイス設定 device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # モデルとトークナイザーの読み込み print("Loading model and tokenizer...") try: model = AutoModel.from_pretrained("AoiNoGeso/japanese-clip-stair", trust_remote_code=True).to(device) tokenizer = AutoTokenizer.from_pretrained("AoiNoGeso/japanese-clip-stair") print("✓ Model loaded successfully") except Exception as e: print(f"✗ Failed to load model: {e}") return # 画像取得 print("Loading image...") try: image_url = "https://images.pexels.com/photos/2253275/pexels-photo-2253275.jpeg" response = requests.get(image_url) image = Image.open(io.BytesIO(response.content)) pixel_values = preprocess_image(image).to(device) print("✓ Image loaded successfully") except Exception as e: print(f"✗ Failed to load image: {e}") return # テキスト候補 texts = ["犬", "猫", "象", "鳥", "魚", "花", "車", "建物"] text_inputs = tokenizer(texts, padding=True, return_tensors="pt") text_inputs = {k: v.to(device) for k, v in text_inputs.items()} # 推論実行 print("Running inference...") try: with torch.no_grad(): outputs = model( pixel_values=pixel_values, input_ids=text_inputs['input_ids'], attention_mask=text_inputs['attention_mask'] ) probs = outputs['logits_per_image'].softmax(dim=-1) print("✓ Inference completed successfully") except Exception as e: print(f"✗ Inference failed: {e}") return # 結果表示 print("\n" + "="*50) print("RESULTS") print("="*50) probs_cpu = probs.cpu().numpy()[0] sorted_indices = probs_cpu.argsort()[::-1] for i, idx in enumerate(sorted_indices): text = texts[idx] prob = probs_cpu[idx] print(f"{i+1:2d}. {text:6s}: {prob:.4f} ({prob*100:.2f}%)") if __name__ == "__main__": main()