Spaces:

Kfjjdjdjdhdhd
/

ddgdgd

Paused

App Files Files Community

Kfjjdjdjdhdhd commited on Feb 13

Commit

f5790af

verified ·

1 Parent(s): 62d2dc9

Upload 13 files

Browse files

Files changed (13) hide show

app.py +728 -0
codegen_torch.py +187 -0
gpt2_pytorch.py +210 -0
image_to_3d_openlrm.py +31 -0
imagegen_vae_unet.py +164 -0
lipsync_wav2lip.py +57 -0
musicgen_torch.py +36 -0
sentiment_roberta.py +195 -0
stt_wav2vec2.py +46 -0
summarization_bart.py +34 -0
text_to_video_clip4clip.py +34 -0
translation_mbart.py +267 -0
tts_vits.py +57 -0

app.py ADDED Viewed

	@@ -0,0 +1,728 @@

+import os
+import sys
+import torch
+import random
+import re
+import json
+import math
+import copy
+import requests
+from functools import lru_cache
+from tqdm import tqdm
+from torch.nn.parameter import Parameter
+from sklearn.datasets import fetch_20newsgroups
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import LogisticRegression
+from sklearn.multiclass import OneVsRestClassifier
+import time
+import threading
+import queue
+import httpx
+import asyncio
+import torch.nn as nn
+import torch.nn.functional as F
+import uuid
+import wget
+from duckduckgo_search import DDGS
+import warnings
+from datetime import datetime
+import unicodedata
+import nltk
+import torchaudio
+import logging
+from PIL import Image
+from io import BytesIO
+import sentencepiece as spm
+from flask import Flask, request, jsonify, send_file, Response
+from flask_cors import CORS
+nltk.download('punkt', quiet=True)
+GPT2_FOLDER = "./GPT2"
+MODEL_FILE = "gpt2-pytorch_model.bin"
+ENCODER_FILE = "encoder.json"
+VOCAB_FILE = "vocab.bpe"
+MODEL_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_model.bin"
+ENCODER_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/GPT2/encoder.json"
+VOCAB_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/GPT2/vocab.bpe"
+GPT2_FILES_URLS = [
+    (MODEL_URL, MODEL_FILE),
+    (ENCODER_URL, ENCODER_FILE),
+    (VOCAB_URL, VOCAB_FILE),
+]
+TEXT_GENERATION_RATE = 40000
+MAX_LENGTH = 1024
+MAX_XDD = 5
+END_OF_TEXT_TOKEN = "<|endoftext|>"
+html_code = """<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AI Text Generation</title>
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/animate.css/4.1.1/animate.min.css"/>
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" integrity="sha512-9usAa10IRO0HhonpyAIVpjrylPvoDwiPUiKdWk5t3PyolY1cOd4DSE0Ga+ri4AuTroPR5aQvXU9xC6qOPnzFeg==" crossorigin="anonymous" referrerpolicy="no-referrer" />
+    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+    <style>
+        body {
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            background: #f0f0f0;
+            color: #333;
+            margin: 0;
+            padding: 0;
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            min-height: 100vh;
+        }
+        .container {
+            width: 95%;
+            max-width: 900px;
+            padding: 20px;
+            background-color: #fff;
+            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+            border-radius: 8px;
+            margin-top: 20px;
+            margin-bottom: 20px;
+            display: flex;
+            flex-direction: column;
+        }
+        .header {
+            text-align: center;
+            margin-bottom: 20px;
+        }
+        .header h1 {
+            font-size: 2em;
+            color: #333;
+        }
+        .form-group {
+            margin-bottom: 15px;
+        }
+        .form-group textarea {
+            width: 100%;
+            padding: 10px;
+            border: 1px solid #ccc;
+            border-radius: 5px;
+            font-size: 16px;
+            box-sizing: border-box;
+            resize: vertical;
+        }
+        button {
+            padding: 10px 15px;
+            border: none;
+            border-radius: 5px;
+            background-color: #007bff;
+            color: white;
+            font-size: 18px;
+            cursor: pointer;
+            transition: background-color 0.3s ease;
+        }
+        button:hover {
+            background-color: #0056b3;
+        }
+        #output {
+            margin-top: 20px;
+            padding: 15px;
+            border: 1px solid #ddd;
+            border-radius: 5px;
+            background-color: #f9f9f9;
+            white-space: pre-wrap;
+            word-break: break-word;
+            overflow-y: auto;
+            max-height: 100vh;
+        }
+        #output strong {
+            font-weight: bold;
+        }
+        .animated-text {
+            position: fixed;
+            top: 20px;
+            left: 20px;
+            font-size: 1.5em;
+            color: rgba(0, 0, 0, 0.1);
+            pointer-events: none;
+            z-index: -1;
+        }
+        @media (max-width: 768px) {
+            .container {
+                width: 98%;
+                margin-top: 10px;
+                margin-bottom: 10px;
+                padding: 15px;
+            }
+            .header h1 {
+                font-size: 1.8em;
+            }
+            .form-group textarea, .form-group input[type="text"] {
+                font-size: 14px;
+                padding: 8px;
+            }
+            button {
+                font-size: 16px;
+                padding: 8px 12px;
+            }
+            #output {
+                font-size: 14px;
+                padding: 10px;
+                margin-top: 15px;
+            }
+        }
+    </style>
+</head>
+<body>
+<div class="animated-text animate__animated animate__fadeIn animate__infinite infinite">AI POWERED</div>
+<div class="container">
+    <div class="header animate__animated animate__fadeInDown">
+    </div>
+    <div class="form-group animate__animated animate__fadeInLeft">
+        <textarea id="text" rows="5" placeholder="Enter text"></textarea>
+    </div>
+    <button onclick="generateText()" class="animate__animated animate__fadeInUp">Generate Reasoning</button>
+    <div id="output" class="animate__animated">
+        <strong >Response:</strong><br>
+        <div id="generatedText"></div>
+    </div>
+</div>
+<script>
+    let eventSource = null;
+    let accumulatedText = "";
+    let lastResponse = "";
+    let currentSpan = null;
+    let messageCounter = 0;
+    async function generateText() {
+        const inputText = document.getElementById("text").value;
+        const generatedTextDiv = document.getElementById("generatedText");
+        generatedTextDiv.innerHTML = "";
+        accumulatedText = "";
+        lastResponse = "";
+        currentSpan = null;
+        messageCounter = 0;
+        if (eventSource) {
+            eventSource.close();
+        }
+        const temp = 0.7;
+        const top_k_val = 40;
+        const top_p_val = 0.0;
+        const repetition_penalty_val = 1.2;
+        eventSource = new EventSource(`/generate_stream?text=${encodeURIComponent(inputText)}&temp=${temp}&top_k=${top_k_val}&top_p=${top_p_val}&reppenalty=${reppenalty_val}`);
+        eventSource.onmessage = function(event) {
+            if (event.data === "<END_STREAM>") {
+                eventSource.close();
+                const currentResponse = accumulatedText.replace("<|endoftext|>", "").replace(re.compile(r'\\s+(?=[.,，。])'), '').trim();
+                if (currentResponse === lastResponse.trim()) {
+                    accumulatedText = "**Response is repetitive. Please try again or rephrase your query.**";
+                } else {
+                    lastResponse = currentResponse;
+                }
+                document.getElementById("generatedText").innerHTML = marked.parse(accumulatedText);
+                return;
+            }
+            try {
+                const jsonData = JSON.parse(event.data);
+                const token = jsonData.token;
+                if (token === "<|endoftext|>" || token === "<END_STREAM>") {
+                    return;
+                }
+                if (token === "<NEW_MESSAGE>") {
+                    messageCounter++;
+                    if (messageCounter > 1) {
+                        generatedTextDiv.innerHTML += "<br><br><hr style='border-top: 1px dashed #8c8b8b; margin-top: 10px; margin-bottom: 10px;'><strong>Continued Response:</strong><br><div id='generatedText_" + messageCounter + "'></div>";
+                        generatedTextDiv = document.getElementById("generatedText_" + messageCounter);
+                        accumulatedText = "";
+                    }
+                    return;
+                }
+                accumulatedText += token + " ";
+            } catch (e) {
+                console.error("Error parsing SSE data:", event.data, e);
+            }
+        };
+        eventSource.onerror = function(error) {
+            console.error("SSE error", error);
+            eventSource.close();
+        };
+        const outputDiv = document.getElementById("output");
+        outputDiv.classList.add("show");
+    }
+</script>
+</body>
+</html>
+"""
+TRANSLATION_FOLDER = "./TranslationModel"
+TRANSLATION_MODEL_WEIGHTS_FILE = "pytorch_model.bin"
+TRANSLATION_MODEL_CONFIG_FILE = "config.json"
+TRANSLATION_MODEL_VOCAB_FILE = "sentencepiece.bpe.model"
+TRANSLATION_MODEL_WEIGHTS_URL = "https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt/resolve/main/pytorch_model.bin"
+TRANSLATION_MODEL_CONFIG_URL = "https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt/resolve/main/config.json"
+TRANSLATION_MODEL_VOCAB_URL = "https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt/resolve/main/sentencepiece.bpe.model"
+TRANSLATION_MODEL_FILES_URLS = [
+    (TRANSLATION_MODEL_WEIGHTS_URL, TRANSLATION_MODEL_WEIGHTS_FILE),
+    (TRANSLATION_MODEL_CONFIG_URL, TRANSLATION_MODEL_CONFIG_FILE),
+    (TRANSLATION_MODEL_VOCAB_URL, TRANSLATION_MODEL_VOCAB_FILE),
+]
+CODEGEN_FOLDER = "./CodeGenModel"
+CODEGEN_MODEL_NAME = "codegen-350M-multi"
+CODEGEN_MODEL_WEIGHTS = "pytorch_model.bin"
+CODEGEN_CONFIG = "config.json"
+CODEGEN_VOCAB = "vocab.json"
+CODEGEN_MERGES = "merges.txt"
+CODEGEN_MODEL_WEIGHTS_URL = "https://huggingface.co/Salesforce/codegen-350M-multi/resolve/main/pytorch_model.bin"
+CODEGEN_CONFIG_URL = "https://huggingface.co/Salesforce/codegen-350M-multi/resolve/main/config.json"
+CODEGEN_VOCAB_URL = "https://huggingface.co/Salesforce/codegen-350M-multi/resolve/main/vocab.json"
+CODEGEN_MERGES_URL = "https://huggingface.co/Salesforce/codegen-350M-multi/resolve/main/merges.txt"
+CODEGEN_FILES_URLS = [
+    (CODEGEN_MODEL_WEIGHTS_URL, CODEGEN_MODEL_WEIGHTS),
+    (CODEGEN_CONFIG_URL, CODEGEN_CONFIG),
+    (CODEGEN_VOCAB_URL, CODEGEN_VOCAB),
+    (CODEGEN_MERGES_URL, CODEGEN_MERGES),
+]
+TTS_FOLDER = "./TTSModel"
+TTS_MODEL_NAME = "vits"
+TTS_MODEL_CONFIG = "config.json"
+TTS_MODEL_WEIGHTS = "pytorch_model.bin"
+TTS_VOCAB = "vocab.json"
+TTS_CONFIG_URL = "https://huggingface.co/kakao-enterprise/vits-vctk/resolve/main/config.json"
+TTS_MODEL_WEIGHTS_URL = "https://huggingface.co/kakao-enterprise/vits-vctk/resolve/main/pytorch_model.bin"
+TTS_VOCAB_URL = "https://huggingface.co/kakao-enterprise/vits-vctk/resolve/main/vocab.json"
+TTS_FILES_URLS = [
+    (TTS_CONFIG_URL, TTS_MODEL_CONFIG),
+    (TTS_MODEL_WEIGHTS_URL, TTS_MODEL_WEIGHTS),
+    (TTS_VOCAB_URL, TTS_VOCAB),
+]
+STT_FOLDER = "./STTModel"
+STT_MODEL_NAME = "wav2vec2"
+STT_MODEL_WEIGHTS = "pytorch_model.bin"
+STT_CONFIG = "config.json"
+STT_VOCAB = "vocab.json"
+STT_MODEL_WEIGHTS_URL = "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/pytorch_model.bin"
+STT_CONFIG_URL = "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/config.json"
+STT_VOCAB_URL = "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/vocab.json"
+STT_FILES_URLS = [
+    (STT_MODEL_WEIGHTS_URL, STT_MODEL_WEIGHTS),
+    (STT_CONFIG_URL, STT_CONFIG),
+    (STT_VOCAB_URL, STT_VOCAB),
+]
+SENTIMENT_FOLDER = "./SentimentModel"
+SENTIMENT_MODEL_WEIGHTS = "pytorch_model.bin"
+SENTIMENT_VOCAB = "sentiment_vocab.json"
+SENTIMENT_CONFIG = "config.json"
+SENTIMENT_MODEL_WEIGHTS_URL = "https://huggingface.co/cardiffnlp/distilroberta-base-sentiment/resolve/main/pytorch_model.bin"
+SENTIMENT_VOCAB_URL = "https://huggingface.co/cardiffnlp/distilroberta-base-sentiment/resolve/main/vocab.json"
+SENTIMENT_CONFIG_URL = "https://huggingface.co/cardiffnlp/distilroberta-base-sentiment/resolve/main/config.json"
+SENTIMENT_FILES_URLS = [
+    (SENTIMENT_MODEL_WEIGHTS_URL, SENTIMENT_MODEL_WEIGHTS),
+    (SENTIMENT_VOCAB_URL, SENTIMENT_VOCAB),
+    (SENTIMENT_CONFIG_URL, SENTIMENT_CONFIG),
+]
+IMAGEGEN_FOLDER = "./ImageGenModel"
+IMAGEGEN_MODEL_WEIGHTS = "diffusion_pytorch_model.bin"
+IMAGEGEN_CONFIG = "config.json"
+IMAGEGEN_MODEL_WEIGHTS_URL = "https://huggingface.co/stabilityai/sd-vae-ft-mse/resolve/main/diffusion_pytorch_model.bin"
+IMAGEGEN_CONFIG_URL = "https://huggingface.co/stabilityai/sd-vae-ft-mse/resolve/main/config.json"
+IMAGEGEN_FILES_URLS = [
+    (IMAGEGEN_MODEL_WEIGHTS_URL, IMAGEGEN_MODEL_WEIGHTS),
+    (IMAGEGEN_CONFIG_URL, IMAGEGEN_CONFIG),
+]
+LIPSYNC_FOLDER = "./LipSyncModel"
+LIPSYNC_MODEL_WEIGHTS = "lipsync_expert.pth"
+LIPSYNC_MODEL_WEIGHTS_URL = "https://iiitaphyd-my.sharepoint.com/personal/radrabha_m_research_iiit_ac_in/_layouts/15/download.aspx?SourceUrl=%2Fpersonal%2Fradrabha%5Fm%5Fresearch%5Fiiit%5Fac%5Fin%2FDocuments%2FWav2Lip%5FModels%2Flipsync%5Fexpert%2Epth"
+LIPSYNC_FILES_URLS = [
+    (LIPSYNC_MODEL_WEIGHTS_URL, LIPSYNC_MODEL_WEIGHTS),
+]
+WAV2LIP_FOLDER = "./Wav2LipModel"
+WAV2LIP_MODEL_WEIGHTS = "wav2lip_gan.pth"
+WAV2LIP_MODEL_WEIGHTS_URL = "https://iiitaphyd-my.sharepoint.com/personal/radrabha_m_research_iiit_ac_in/_layouts/15/download.aspx?SourceUrl=%2Fpersonal%2Fradrabha%5Fm%5Fresearch%5Fiiit%5Fac%5Fin%2FDocuments%2FWav2Lip%5FModels%2Fwav2lip%5Fgan%2Epth"
+WAV2LIP_FILES_URLS = [
+    (WAV2LIP_MODEL_WEIGHTS_URL, WAV2LIP_MODEL_WEIGHTS),
+]
+MUSICGEN_FOLDER = "./MusicGenModel"
+MUSICGEN_MODEL_NAME = "melody"
+MUSICGEN_MODEL_WEIGHTS = "pytorch_model.bin"
+MUSICGEN_CONFIG = "config.json"
+MUSICGEN_SAMPLE_RATE = 32000
+MUSICGEN_DURATION = 8
+MUSICGEN_MODEL_WEIGHTS_URL = "https://huggingface.co/facebook/musicgen-small/resolve/main/pytorch_model.bin"
+MUSICGEN_CONFIG_URL = "https://huggingface.co/facebook/musicgen-small/resolve/main/config.json"
+MUSICGEN_FILES_URLS = [
+    (MUSICGEN_MODEL_WEIGHTS_URL, MUSICGEN_MODEL_WEIGHTS),
+    (MUSICGEN_CONFIG_URL, MUSICGEN_CONFIG),
+]
+CODEGEN_SPM_URL = "https://huggingface.co/Salesforce/codegen-350M-multi/resolve/main/spm.model"
+CODEGEN_SPM = "spm.model"
+TRANSLATION_SPM_URL = "https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt/resolve/main/sentencepiece.bpe.model"
+TRANSLATION_SPM = "sentencepiece.bpe.model"
+TEXT_TO_VIDEO_FOLDER = "./TextToVideoModel"
+TEXT_TO_VIDEO_MODEL_WEIGHTS = "pytorch_model.bin"
+TEXT_TO_VIDEO_CONFIG = "config.json"
+TEXT_TO_VIDEO_VOCAB = "vocab.json"
+TEXT_TO_VIDEO_MODEL_WEIGHTS_URL = "https://huggingface.co/Searchium-ai/clip4clip-webvid150k/resolve/main/pytorch_model.bin"
+TEXT_TO_VIDEO_CONFIG_URL = "https://huggingface.co/Searchium-ai/clip4clip-webvid150k/resolve/main/config.json"
+TEXT_TO_VIDEO_VOCAB_URL = "https://huggingface.co/Searchium-ai/clip4clip-webvid150k/resolve/main/vocab.json"
+TEXT_TO_VIDEO_FILES_URLS = [
+    (TEXT_TO_VIDEO_MODEL_WEIGHTS_URL, TEXT_TO_VIDEO_MODEL_WEIGHTS),
+    (TEXT_TO_VIDEO_CONFIG_URL, TEXT_TO_VIDEO_CONFIG),
+    (TEXT_TO_VIDEO_VOCAB_URL, TEXT_TO_VIDEO_VOCAB),
+]
+SUMMARIZATION_FOLDER = "./SummarizationModel"
+SUMMARIZATION_MODEL_WEIGHTS = "pytorch_model.bin"
+SUMMARIZATION_CONFIG = "config.json"
+SUMMARIZATION_VOCAB = "vocab.json"
+SUMMARIZATION_MODEL_WEIGHTS_URL = "https://huggingface.co/facebook/bart-large-cnn/resolve/main/pytorch_model.bin"
+SUMMARIZATION_CONFIG_URL = "https://huggingface.co/facebook/bart-large-cnn/resolve/main/config.json"
+SUMMARIZATION_VOCAB_URL = "https://huggingface.co/facebook/bart-large-cnn/resolve/main/vocab.json"
+SUMMARIZATION_FILES_URLS = [
+    (SUMMARIZATION_MODEL_WEIGHTS_URL, SUMMARIZATION_MODEL_WEIGHTS),
+    (SUMMARIZATION_CONFIG_URL, SUMMARIZATION_CONFIG),
+    (SUMMARIZATION_VOCAB_URL, SUMMARIZATION_VOCAB),
+]
+IMAGE_TO_3D_FOLDER = "./ImageTo3DModel"
+IMAGE_TO_3D_MODEL_WEIGHTS = "pytorch_model.bin"
+IMAGE_TO_3D_CONFIG = "config.json"
+IMAGE_TO_3D_MODEL_URL = "https://huggingface.co/zxhezexin/openlrm-obj-base-1.1/resolve/main/pytorch_model.bin"
+IMAGE_TO_3D_CONFIG_URL = "https://huggingface.co/zxhezexin/openlrm-obj-base-1.1/resolve/main/config.json"
+IMAGE_TO_3D_FILES_URLS = [
+    (IMAGE_TO_3D_MODEL_URL, IMAGE_TO_3D_MODEL_WEIGHTS),
+    (IMAGE_TO_3D_CONFIG_URL, IMAGE_TO_3D_CONFIG),
+]
+state_dict = None
+enc = None
+config = None
+model = None
+device = torch.device("cpu")
+news_clf = None
+tfidf_vectorizer = None
+text_queue = queue.Queue()
+categories = None
+is_training = False
+background_threads = []
+feedback_queue = queue.Queue()
+reasoning_queue = queue.Queue()
+seen_responses = set()
+tts_model = None
+stt_model = None
+sentiment_model = None
+imagegen_model = None
+lipsync_model = None
+wav2lip_model = None
+musicgen_model = None
+translation_model = None
+codegen_model = None
+text_to_video_model = None
+summarization_model = None
+image_to_3d_model = None
+tts_pipeline = False
+stt_pipeline = False
+sentiment_pipeline = False
+imagegen_pipeline = False
+translation_pipeline = False
+codegen_pipeline = False
+text_to_video_pipeline = False
+summarization_pipeline = False
+image_to_3d_pipeline = False
+stt_tokenizer = None
+stt_processor = None
+sentiment_tokenizer = None
+sentiment_model_instance = None
+imagegen_vae = None
+imagegen_unet = None
+imagegen_scheduler = None
+musicgen_model_instance = None
+musicgen_tokenizer = None
+musicgen_processor = None
+translation_model_instance = None
+translation_tokenizer = None
+codegen_model_instance = None
+codegen_tokenizer = None
+codegen_sp = None
+translation_sp = None
+text_to_video_tokenizer = None
+text_to_video_model_instance = None
+summarization_tokenizer = None
+summarization_model_instance = None
+image_to_3d_config = None
+image_to_3d_model_instance = None
+app = Flask(__name__)
+CORS(app)
+from gpt2_pytorch import *
+from tts_vits import *
+from stt_wav2vec2 import *
+from sentiment_roberta import *
+from imagegen_vae_unet import *
+from musicgen_torch import *
+from translation_mbart import *
+from codegen_torch import *
+from text_to_video_clip4clip import *
+from summarization_bart import *
+from image_to_3d_openlrm import *
+def download_file(url, filename):
+    os.makedirs(os.path.dirname(filename), exist_ok=True) # Ensure directory exists
+    if not os.path.exists(filename):
+        print(f"Downloading {filename} from {url}...")
+        try:
+            wget.download(url, out=filename) # Specify output filename directly
+            print(f"Downloaded {filename} successfully.")
+        except Exception as e:
+            print(f"Error downloading {filename}: {e}")
+def ensure_folder_and_files_exist(folder_path, files_urls):
+    if not os.path.exists(folder_path):
+        os.makedirs(folder_path)
+        print(f"Folder '{folder_path}' created.")
+    for url, filename in files_urls:
+        filepath = os.path.join(folder_path, filename)
+        download_file(url, filepath)
+def ensure_single_file_exists(folder_path, file_url, filename):
+    if not os.path.exists(folder_path):
+        os.makedirs(folder_path)
+        print(f"Folder '{folder_path}' created.")
+    filepath = os.path.join(folder_path, filename)
+    download_file(file_url, filepath)
+def ensure_gpt2_files_exist():
+    ensure_folder_and_files_exist(GPT2_FOLDER, GPT2_FILES_URLS)
+def ensure_translation_files_exist():
+    ensure_folder_and_files_exist(TRANSLATION_FOLDER, TRANSLATION_MODEL_FILES_URLS)
+    ensure_single_file_exists(TRANSLATION_FOLDER, TRANSLATION_SPM_URL, TRANSLATION_SPM)
+def ensure_codegen_files_exist():
+    ensure_folder_and_files_exist(CODEGEN_FOLDER, CODEGEN_FILES_URLS)
+    ensure_single_file_exists(CODEGEN_FOLDER, CODEGEN_SPM_URL, CODEGEN_SPM)
+def ensure_tts_files_exist():
+    ensure_folder_and_files_exist(TTS_FOLDER, TTS_FILES_URLS)
+def ensure_stt_files_exist():
+    ensure_folder_and_files_exist(STT_FOLDER, STT_FILES_URLS)
+def ensure_sentiment_files_exist():
+    ensure_folder_and_files_exist(SENTIMENT_FOLDER, SENTIMENT_FILES_URLS)
+def ensure_imagegen_files_exist():
+    ensure_folder_and_files_exist(IMAGEGEN_FOLDER, IMAGEGEN_FILES_URLS)
+def ensure_lipsync_files_exist():
+    ensure_folder_and_files_exist(LIPSYNC_FOLDER, LIPSYNC_FILES_URLS)
+def ensure_wav2lip_files_exist():
+    ensure_folder_and_files_exist(WAV2LIP_FOLDER, WAV2LIP_FILES_URLS)
+def ensure_musicgen_files_exist():
+    ensure_folder_and_files_exist(MUSICGEN_FOLDER, MUSICGEN_FILES_URLS)
+def ensure_text_to_video_files_exist():
+    ensure_folder_and_files_exist(TEXT_TO_VIDEO_FOLDER, TEXT_TO_VIDEO_FILES_URLS)
+def ensure_summarization_files_exist():
+    ensure_folder_and_files_exist(SUMMARIZATION_FOLDER, SUMMARIZATION_FILES_URLS)
+def ensure_image_to_3d_files_exist():
+    ensure_folder_and_files_exist(IMAGE_TO_3D_FOLDER, IMAGE_TO_3D_FILES_URLS)
+def ensure_all_model_files_exist(): # Define the function here, before it's called
+    ensure_gpt2_files_exist()
+    ensure_translation_files_exist()
+    ensure_codegen_files_exist()
+    ensure_tts_files_exist()
+    ensure_stt_files_exist()
+    ensure_sentiment_files_exist()
+    ensure_imagegen_files_exist()
+    ensure_lipsync_files_exist()
+    ensure_wav2lip_files_exist()
+    ensure_musicgen_files_exist()
+    ensure_text_to_video_files_exist()
+    ensure_summarization_files_exist()
+    ensure_image_to_3d_files_exist()
+@app.route("/", methods=['GET'])
+async def html_handler():
+    return html_code
+@app.route("/generate_stream", methods=['GET'])
+async def generate_stream_api():
+    text_input = request.args.get("text")
+    temperature = float(request.args.get("temp", 0.7))
+    top_k = int(request.args.get("top_k", 40))
+    top_p = float(request.args.get("top_p", 0.0))
+    reppenalty = float(request.args.get("reppenalty", 1.2))
+    return Response(generate_stream_generator(text_input, temperature, top_k, top_p, reppenalty), mimetype='text/event-stream')
+@app.route("/tts", methods=['POST'])
+def tts_api():
+    data = request.get_json()
+    text = data.get('text')
+    if not text:
+        return jsonify({"error": "Text is required"}), 400
+    output_file = text_to_speech(text)
+    if output_file == "Error generating speech.":
+        return jsonify({"error": "TTS generation failed"}), 500
+    return send_file(output_file, mimetype="audio/wav", as_attachment=True, download_name="output.wav")
+@app.route("/stt", methods=['POST'])
+def stt_api():
+    if 'audio' not in request.files:
+        return jsonify({"error": "Audio file is required"}), 400
+    audio_file = request.files['audio']
+    temp_audio_path = f"temp_audio_{uuid.uuid4()}.wav"
+    audio_file.save(temp_audio_path)
+    output_file = speech_to_text(temp_audio_path)
+    os.remove(temp_audio_path)
+    if output_file == "Error transcribing audio.":
+        return jsonify({"error": "STT failed"}), 500
+    return send_file(output_file, mimetype="text/plain", as_attachment=True, download_name="output.txt")
+@app.route("/sentiment", methods=['POST'])
+def sentiment_api():
+    data = request.get_json()
+    text = data.get('text')
+    if not text:
+        return jsonify({"error": "Text is required"}), 400
+    output_file = analyze_sentiment(text)
+    if output_file == "Sentiment model not initialized.":
+        return jsonify({"error": "Sentiment analysis failed"}), 500
+    return jsonify(output_file)
+@app.route("/imagegen", methods=['POST'])
+def imagegen_api():
+    data = request.get_json()
+    prompt = data.get('prompt')
+    if not prompt:
+        return jsonify({"error": "Prompt is required"}), 400
+    output_file = generate_image(prompt)
+    if output_file == "Error generating image.":
+        return jsonify({"error": "Image generation failed"}), 500
+    image_io = BytesIO()
+    output_file.save(image_io, 'PNG')
+    image_io.seek(0)
+    return send_file(image_io, mimetype='image/png', as_attachment=True, download_name="output.png")
+@app.route("/musicgen", methods=['POST'])
+def musicgen_api():
+    data = request.get_json()
+    prompt = data.get('prompt')
+    if not prompt:
+        return jsonify({"error": "Prompt is required"}), 400
+    output_file = generate_music(prompt)
+    if output_file == "Error generating music.":
+        return jsonify({"error": "Music generation failed"}), 500
+    return send_file(output_file, mimetype="audio/wav", as_attachment=True, download_name="output.wav")
+@app.route("/translation", methods=['POST'])
+def translation_api():
+    data = request.get_json()
+    text = data.get('text')
+    target_lang = data.get('target_lang', 'es')
+    source_lang = data.get('source_lang', 'en')
+    if not text:
+        return jsonify({"error": "Text is required"}), 400
+    output_file = perform_translation(text, target_language_code=f'{target_lang}_XX', source_language_code=f'{source_lang}_XX')
+    if output_file == "Error during translation.":
+        return jsonify({"error": "Translation failed"}), 500
+    return send_file(output_file, mimetype="text/plain", as_attachment=True, download_name="output_translation.txt")
+@app.route("/codegen", methods=['POST'])
+def codegen_api():
+    data = request.get_json()
+    prompt = data.get('prompt')
+    if not prompt:
+        return jsonify({"error": "Prompt is required"}), 400
+    output_file = generate_code(prompt)
+    if output_file == "Error generating code.":
+        return jsonify({"error": "Code generation failed"}), 500
+    return send_file(output_file, mimetype="text/x-python", as_attachment=True, download_name="output.py")
+@app.route("/text_to_video", methods=['POST'])
+def text_to_video_api():
+    data = request.get_json()
+    prompt = data.get('prompt')
+    if not prompt:
+        return jsonify({"error": "Prompt is required"}), 400
+    output_file = text_to_video(prompt)
+    if output_file == "Error generating video representation.":
+        return jsonify({"error": "Text to video failed"}), 500
+    return send_file(output_file, mimetype="application/octet-stream", as_attachment=True, download_name="output_video_representation.pt")
+@app.route("/summarization", methods=['POST'])
+def summarization_api():
+    data = request.get_json()
+    text = data.get('text')
+    if not text:
+        return jsonify({"error": "Text is required"}), 400
+    output_file = summarize_text(text)
+    if output_file == "Error during summarization.":
+        return jsonify({"error": "Summarization failed"}), 500
+    return send_file(output_file, mimetype="text/plain", as_attachment=True, download_name="output_summary.txt")
+@app.route("/image_to_3d", methods=['POST'])
+def image_to_3d_api():
+    if 'image' not in request.files:
+        return jsonify({"error": "Image file is required"}), 400
+    image_file = request.files['image']
+    temp_image_path = f"temp_image_{uuid.uuid4()}.png"
+    image_file.save(temp_image_path)
+    output_file = image_to_3d(temp_image_path)
+    os.remove(temp_image_path)
+    if output_file == "Error converting image to 3D.":
+        return jsonify({"error": "Image to 3D failed"}), 500
+    return send_file(output_file, mimetype="model/obj", as_attachment=True, download_name="output_3d.obj")
+async def main():
+    global background_threads, response_queue
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+    response_queue = queue.Queue()
+    ensure_all_model_files_exist()
+    initialize_model()
+    await initialize_sklearn()
+    initialize_tts_model()
+    initialize_stt_model()
+    initialize_sentiment_model()
+    initialize_imagegen_model()
+    ensure_lipsync_files_exist()
+    ensure_wav2lip_files_exist()
+    initialize_musicgen_model()
+    initialize_translation_model()
+    initialize_codegen_model()
+    initialize_text_to_video_model()
+    initialize_summarization_model()
+    initialize_image_to_3d_model()
+    background_threads.append(threading.Thread(target=generate_and_queue_text, args=('en',), daemon=True))
+    background_threads.append(threading.Thread(target=generate_and_queue_text, args=('es',), daemon=True))
+    background_threads.append(threading.Thread(target=background_training, daemon=True))
+    for thread in background_threads:
+        thread.start()
+    asyncio.create_task(background_reasoning_queue())
+    app.run(host="127.0.0.1", port=7860, debug=False)
+if __name__ == '__main__':
+    asyncio.run(main())

codegen_torch.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import wget
+import json
+import os
+import sentencepiece as spm
+import re
+CODEGEN_FOLDER = "./CodeGenModel"
+CODEGEN_MODEL_NAME = "codegen-350M-multi"
+CODEGEN_MODEL_WEIGHTS = "pytorch_model.bin"
+CODEGEN_CONFIG = "config.json"
+CODEGEN_VOCAB = "vocab.json"
+CODEGEN_MERGES = "merges.txt"
+CODEGEN_MODEL_WEIGHTS_URL = "https://huggingface.co/Salesforce/codegen-350M-multi/resolve/main/pytorch_model.bin"
+CODEGEN_CONFIG_URL = "https://huggingface.co/Salesforce/codegen-350M-multi/resolve/main/config.json"
+CODEGEN_VOCAB_URL = "https://huggingface.co/Salesforce/codegen-350M-multi/resolve/main/vocab.json"
+CODEGEN_MERGES_URL = "https://huggingface.co/Salesforce/codegen-350M-multi/resolve/main/merges.txt"
+CODEGEN_FILES_URLS = [
+    (CODEGEN_MODEL_WEIGHTS_URL, CODEGEN_MODEL_WEIGHTS),
+    (CODEGEN_CONFIG_URL, CODEGEN_CONFIG),
+    (CODEGEN_VOCAB_URL, CODEGEN_VOCAB),
+    (CODEGEN_MERGES_URL, CODEGEN_MERGES),
+]
+CODEGEN_SPM_URL = "https://huggingface.co/Salesforce/codegen-350M-multi/resolve/main/spm.model"
+CODEGEN_SPM = "spm.model"
+def ensure_codegen_files_exist():
+    os.makedirs(CODEGEN_FOLDER, exist_ok=True)
+    for url, filename in CODEGEN_FILES_URLS:
+        filepath = os.path.join(CODEGEN_FOLDER, filename)
+        if not os.path.exists(filepath):
+            wget.download(url, out=filepath)
+    filepath_spm = os.path.join(CODEGEN_FOLDER, CODEGEN_SPM)
+    if not os.path.exists(filepath_spm):
+        wget.download(CODEGEN_SPM_URL, out=filepath_spm)
+class CodeGenConfig:
+    def __init__(self, vocab_size, n_positions=2048, n_ctx=2048, n_embd=1024, n_layer=24, n_head=16, n_inner=None, activation_function="gelu_new", resid_pdrop=0.1, embd_pdrop=0.1, attn_pdrop=0.1, layer_norm_epsilon=1e-05, initializer_range=0.02, scale_attn_weights=True, use_cache=True, bos_token_id=50256, eos_token_id=50256, **kwargs):
+        self.vocab_size = vocab_size
+        self.n_positions = n_positions
+        self.n_ctx = n_ctx
+        self.n_embd = n_embd
+        self.n_layer = n_layer
+        self.n_head = n_head
+        self.n_inner = n_inner
+        self.activation_function = activation_function
+        self.resid_pdrop = resid_pdrop
+        self.embd_pdrop = embd_pdrop
+        self.attn_pdrop = attn_pdrop
+        self.layer_norm_epsilon = layer_norm_epsilon
+        self.initializer_range = initializer_range
+        self.scale_attn_weights = scale_attn_weights
+        self.use_cache = use_cache
+        self.bos_token_id = bos_token_id
+        self.eos_token_id = eos_token_id
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+    @classmethod
+    def from_dict(cls, config_dict):
+        return cls(**config_dict)
+class CodeGenForCausalLM(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.transformer = CodeGenModel(config)
+        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
+    def forward(self, input_ids, attention_mask=None):
+        transformer_outputs = self.transformer(input_ids, attention_mask=attention_mask)
+        logits = self.lm_head(transformer_outputs)
+        return logits
+class CodeGenModel(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.wte = nn.Embedding(config.vocab_size, config.n_embd)
+        self.wpe = nn.Embedding(config.n_positions, config.n_embd)
+        self.drop = nn.Dropout(config.embd_pdrop)
+        self.h = nn.ModuleList([CodeGenBlock(config) for _ in range(config.n_layer)])
+        self.ln_f = nn.LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
+    def forward(self, input_ids, attention_mask=None):
+        input_shape = input_ids.size()
+        input_ids = input_ids.view(-1, input_ids.size(-1))
+        position_ids = torch.arange(0, input_shape[-1], dtype=torch.long, device=input_ids.device)
+        position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
+        inputs_embeds = self.wte(input_ids)
+        position_embeds = self.wpe(position_ids)
+        hidden_states = inputs_embeds + position_embeds
+        hidden_states = self.drop(hidden_states)
+        output_shape = input_shape + (hidden_states.size(-1),)
+        for block in self.h:
+            hidden_states = block(hidden_states, attention_mask=attention_mask)
+        hidden_states = self.ln_f(hidden_states)
+        return hidden_states.view(*output_shape)
+class CodeGenBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln_1 = LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
+        self.attn = CodeGenAttention(config)
+        self.ln_2 = LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
+        self.mlp = CodeGenMLP(config)
+    def forward(self, hidden_states, attention_mask=None):
+        residual = hidden_states
+        hidden_states = self.ln_1(hidden_states)
+        attn_outputs = self.attn(hidden_states, attention_mask=attention_mask)
+        hidden_states = residual + attn_outputs
+        residual = hidden_states
+        hidden_states = self.ln_2(hidden_states)
+        feedforward_hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + feedforward_hidden_states
+        return hidden_states
+class CodeGenMLP(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.c_fc = nn.Linear(config.n_embd, config.n_inner)
+        self.c_proj = nn.Linear(config.n_inner, config.n_embd)
+        self.dropout = nn.Dropout(config.resid_pdrop)
+    def forward(self, hidden_states):
+        hidden_states = self.c_fc(hidden_states)
+        hidden_states = F.gelu(hidden_states)
+        hidden_states = self.c_proj(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        return hidden_states
+class CodeGenAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.attn_dropout = nn.Dropout(config.attn_pdrop)
+        self.resid_dropout = nn.Dropout(config.resid_pdrop)
+        self.n_head = config.n_head
+        self.embed_dim = config.n_embd
+        self.split_size = self.embed_dim
+        self.c_attn = nn.Linear(self.embed_dim, 3 * self.embed_dim)
+        self.c_proj = nn.Linear(self.embed_dim, self.embed_dim)
+        self.scale_attn_weights = config.scale_attn_weights
+        self.use_cache = config.use_cache
+        self.register_buffer("bias", torch.tril(torch.ones((config.n_ctx, config.n_ctx), dtype=torch.uint8)).view((1, 1, config.n_ctx, config.n_ctx)))
+    def _attn(self, query, key, value, attention_mask=None, head_mask=None):
+        attn_weights = torch.matmul(query, key.transpose(-1, -2))
+        if self.scale_attn_weights:
+            attn_weights = attn_weights / math.sqrt(value.size(-1))
+        mask = self.bias[:, :, :attn_weights.size(-2), :attn_weights.size(-1)]
+        attn_weights = torch.where(mask.bool(), attn_weights, torch.tensor(-1e4, device=attn_weights.device))
+        if attention_mask is not None:
+            attn_weights = attn_weights + attention_mask
+        attn_weights = nn.Softmax(dim=-1)(attn_weights)
+        attn_weights = self.attn_dropout(attn_weights)
+        attn_output = torch.matmul(attn_weights, value)
+        return attn_output
+    def _split_heads(self, tensor, num_heads, attn_head_size):
+        new_shape = tensor.size()[:-1] + (num_heads, attn_head_size)
+        tensor = tensor.view(*new_shape)
+        return tensor.permute(0, 2, 1, 3)
+    def _merge_heads(self, tensor, num_heads, attn_head_size):
+        new_shape = tensor.size()[:-2] + (num_heads * attn_head_size,)
+        return tensor.view(*new_shape)
+    def forward(self, hidden_states, attention_mask=None, head_mask=None, past_key_value=None, use_cache=False):
+        query, key, value = self.c_attn(hidden_states).split(self.split_size, dim=2)
+        query = self._split_heads(query, self.n_head, self.embed_dim // self.n_head)
+        key = self._split_heads(key, self.n_head, self.embed_dim // self.n_head)
+        value = self._split_heads(value, self.n_head, self.embed_dim // self.n_head)
+        if past_key_value is not None:
+            past_key, past_value = past_key_value
+            key = torch.cat((past_key, key), dim=-2)
+            value = torch.cat((past_value, value), dim=-2)
+        present_key_value = (key, value) if use_cache else None
+        attn_output = self._attn(query, key, value, attention_mask, head_mask)
+        attn_output = self._merge_heads(attn_output, self.n_head, self.embed_dim // self.n_head)
+        attn_output = self.c_proj(attn_output)
+        attn_output = self.resid_dropout(attn_output)
+        outputs = (attn_output, present_key_value)
+        return outputs[0]

gpt2_pytorch.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import wget
+import json
+from tqdm import tqdm
+GPT2_FOLDER = "./GPT2"
+MODEL_FILE = "gpt2-pytorch_model.bin"
+ENCODER_FILE = "encoder.json"
+VOCAB_FILE = "vocab.bpe"
+MODEL_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_model.bin"
+ENCODER_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/GPT2/encoder.json"
+VOCAB_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/GPT2/vocab.bpe"
+MAX_LENGTH = 1024
+END_OF_TEXT_TOKEN = "<|endoftext|>"
+def ensure_gpt2_files_exist():
+    if not os.path.exists(os.path.join(GPT2_FOLDER, MODEL_FILE)):
+        wget.download(MODEL_URL, out=os.path.join(GPT2_FOLDER, MODEL_FILE))
+    if not os.path.exists(os.path.join(GPT2_FOLDER, ENCODER_FILE)):
+        wget.download(ENCODER_URL, out=os.path.join(GPT2_FOLDER, ENCODER_FILE))
+    if not os.path.exists(os.path.join(GPT2_FOLDER, VOCAB_FILE)):
+        wget.download(VOCAB_URL, out=os.path.join(GPT2_FOLDER, VOCAB_FILE))
+class GPT2Config:
+    def __init__(self, vocab_size_or_config_json_file=50257, n_positions=MAX_LENGTH, n_ctx=MAX_LENGTH, n_embd=768, n_layer=12, n_head=12, layer_norm_epsilon=1e-5, initializer_range=0.02):
+        self.vocab_size = vocab_size_or_config_json_file
+        self.n_ctx = n_ctx
+        self.n_positions = n_positions
+        self.n_embd = n_embd
+        self.n_layer = n_layer
+        self.n_head = n_head
+        self.layer_norm_epsilon = layer_norm_epsilon
+        self.initializer_range = initializer_range
+class GPT2LMHeadModel(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.transformer = GPT2Model(config)
+        self.lm_head = GPT2LMHead(self.transformer.wte.weight, config)
+    def forward(self, input_ids, position_ids=None, token_type_ids=None, past=None):
+        lm_logits, presents = self.transformer(input_ids, position_ids, token_type_ids, past)
+        return lm_logits, presents
+class GPT2Model(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.n_layer = config.n_layer
+        self.n_embd = config.n_embd
+        self.n_vocab = config.vocab_size
+        self.wte = nn.Embedding(config.vocab_size, config.n_embd)
+        self.wpe = nn.Embedding(config.n_positions, config.n_embd)
+        block = Block(config.n_ctx, config, scale=True)
+        self.h = nn.ModuleList([copy.deepcopy(block) for _ in range(config.n_layer)])
+        self.ln_f = LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
+    def forward(self, input_ids, position_ids=None, token_type_ids=None, past=None):
+        if past is None:
+            past_length = 0
+            past = [None] * len(self.h)
+        else:
+            past_length = past[0][0].size(-2)
+        if position_ids is None:
+            position_ids = torch.arange(past_length, input_ids.size(-1) + past_length, dtype=torch.long, device=input_ids.device)
+            position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
+        input_shape = input_ids.size()
+        input_ids = input_ids.view(-1, input_ids.size(-1))
+        position_ids = position_ids.view(-1, position_ids.size(-1))
+        inputs_embeds = self.wte(input_ids)
+        position_embeds = self.wpe(position_ids)
+        if token_type_ids is not None:
+            token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1))
+            token_type_embeds = self.wte(token_type_ids)
+        else:
+            token_type_embeds = 0
+        hidden_states = inputs_embeds + position_embeds + token_type_embeds
+        presents = []
+        for block, layer_past in zip(self.h, past):
+            hidden_states, present = block(hidden_states, layer_past)
+            presents.append(present)
+        hidden_states = self.ln_f(hidden_states)
+        output_shape = input_shape + (hidden_states.size(-1),)
+        return hidden_states.view(*output_shape), presents
+class GPT2LMHead(nn.Module):
+    def __init__(self, model_embeddings_weights, config):
+        super().__init__()
+        self.n_embd = config.n_embd
+        self.decoder = nn.Linear(config.n_embd, config.vocab_size, bias=False)
+        self.decoder.weight = model_embeddings_weights
+    def forward(self, hidden_state):
+        lm_logits = self.decoder(hidden_state)
+        return lm_logits
+class Block(nn.Module):
+    def __init__(self, n_ctx, config, scale=False):
+        super().__init__()
+        nx = config.n_embd
+        self.ln_1 = LayerNorm(nx, eps=config.layer_norm_epsilon)
+        self.attn = Attention(nx, n_ctx, config, scale)
+        self.ln_2 = LayerNorm(nx, eps=config.layer_norm_epsilon)
+        self.mlp = MLP(4 * nx, config)
+    def forward(self, x, layer_past=None):
+        a, present = self.attn(self.ln_1(x), layer_past=layer_past)
+        x = x + a
+        m = self.mlp(self.ln_2(x))
+        x = x + m
+        return x, present
+class Attention(nn.Module):
+    def __init__(self, nx, n_ctx, config, scale=False):
+        super().__init__()
+        n_state = nx
+        assert n_state % config.n_head == 0
+        self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
+        self.n_head = config.n_head
+        self.split_size = n_state
+        self.scale = scale
+        self.c_attn = Conv1D(n_state * 3, nx)
+        self.c_proj = Conv1D(n_state, nx)
+    def _attn(self, q, k, v):
+        w = torch.matmul(q, k)
+        if self.scale:
+            w = w / math.sqrt(v.size(-1))
+        nd, ns = w.size(-2), w.size(-1)
+        b = self.bias[:, :, ns - nd:ns, :ns]
+        w = w * b - 1e-10 * (1 - b)
+        w = nn.Softmax(dim=-1)(w)
+        return torch.matmul(w, v)
+    def merge_heads(self, x):
+        x = x.permute(0, 2, 1, 3).contiguous()
+        new_x_shape = x.size()[:-2] + (x.size(-2) * x.size(-1),)
+        return x.view(*new_x_shape)
+    def split_heads(self, x, k=False):
+        new_x_shape = x.size()[:-1] + (self.n_head, x.size(-1) // self.n_head)
+        x = x.view(*new_x_shape)
+        if k:
+            return x.permute(0, 2, 3, 1)
+        else:
+            return x.permute(0, 2, 1, 3)
+    def forward(self, x, layer_past=None):
+        x = self.c_attn(x)
+        query, key, value = x.split(self.split_size, dim=2)
+        query = self.split_heads(query)
+        key = self.split_heads(key, k=True)
+        value = self.split_heads(value)
+        if layer_past is not None:
+            past_key, past_value = layer_past[0].transpose(-2, -1), layer_past[1]
+            key = torch.cat((past_key, key), dim=-1)
+            value = torch.cat((past_value, value), dim=-2)
+        present = torch.stack((key.transpose(-2, -1), value))
+        a = self._attn(query, key, value)
+        a = self.merge_heads(a)
+        a = self.c_proj(a)
+        return a, present
+class MLP(nn.Module):
+    def __init__(self, n_state, config):
+        super().__init__()
+        nx = config.n_embd
+        self.c_fc = Conv1D(n_state, nx)
+        self.c_proj = Conv1D(nx, n_state)
+        self.act = gelu
+    def forward(self, x):
+        h = self.act(self.c_fc(x))
+        h2 = self.c_proj(h)
+        return h2
+class Conv1D(nn.Module):
+    def __init__(self, nf, nx):
+        super().__init__()
+        self.nf = nf
+        w = torch.empty(nx, nf)
+        nn.init.normal_(w, std=0.02)
+        self.weight = Parameter(w)
+        self.bias = Parameter(torch.zeros(nf))
+    def forward(self, x):
+        size_out = x.size()[:-1] + (self.nf,)
+        x = torch.addmm(self.bias, x.view(-1, x.size(-1)), self.weight)
+        x = x.view(*size_out)
+        return x
+class LayerNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-12):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.bias = nn.Parameter(torch.zeros(hidden_size))
+        self.variance_epsilon = eps
+    def forward(self, x):
+        u = x.mean(-1, keepdim=True)
+        s = (x - u).pow(2).mean(-1, keepdim=True)
+        x = (x - u) / torch.sqrt(s + self.variance_epsilon)
+        return self.weight * x + self.bias
+def gelu(x):
+    return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))

image_to_3d_openlrm.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import torch
+import torch.nn as nn
+import wget
+import json
+import os
+IMAGE_TO_3D_FOLDER = "./ImageTo3DModel"
+IMAGE_TO_3D_MODEL_WEIGHTS = "pytorch_model.bin"
+IMAGE_TO_3D_CONFIG = "config.json"
+IMAGE_TO_3D_MODEL_URL = "https://huggingface.co/zxhezexin/openlrm-obj-base-1.1/resolve/main/pytorch_model.bin"
+IMAGE_TO_3D_CONFIG_URL = "https://huggingface.co/zxhezexin/openlrm-obj-base-1.1/resolve/main/config.json"
+IMAGE_TO_3D_FILES_URLS = [
+    (IMAGE_TO_3D_MODEL_URL, IMAGE_TO_3D_MODEL_WEIGHTS),
+    (IMAGE_TO_3D_CONFIG_URL, IMAGE_TO_3D_CONFIG),
+]
+def ensure_image_to_3d_files_exist():
+    os.makedirs(IMAGE_TO_3D_FOLDER, exist_ok=True)
+    for url, filename in IMAGE_TO_3D_FILES_URLS:
+        filepath = os.path.join(IMAGE_TO_3D_FOLDER, filename)
+        if not os.path.exists(filepath):
+            wget.download(url, out=filepath)
+class OpenLRM(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.fc = nn.Linear(100, num_classes)
+    def forward(self, x):
+        logits = self.fc(x)
+        return logits

imagegen_vae_unet.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import wget
+import json
+import os
+IMAGEGEN_FOLDER = "./ImageGenModel"
+IMAGEGEN_MODEL_WEIGHTS = "diffusion_pytorch_model.bin"
+IMAGEGEN_CONFIG = "config.json"
+IMAGEGEN_MODEL_URL = "https://huggingface.co/stabilityai/sd-vae-ft-mse/resolve/main/diffusion_pytorch_model.bin"
+IMAGEGEN_CONFIG_URL = "https://huggingface.co/stabilityai/sd-vae-ft-mse/resolve/main/config.json"
+IMAGEGEN_FILES_URLS = [
+    (IMAGEGEN_MODEL_URL, IMAGEGEN_MODEL_WEIGHTS),
+    (IMAGEGEN_CONFIG_URL, IMAGEGEN_CONFIG),
+]
+def ensure_imagegen_files_exist():
+    os.makedirs(IMAGEGEN_FOLDER, exist_ok=True)
+    for url, filename in IMAGEGEN_FILES_URLS:
+        filepath = os.path.join(IMAGEGEN_FOLDER, filename)
+        if not os.path.exists(filepath):
+            wget.download(url, out=filepath)
+class UNet2DConditionModelConfig:
+    def __init__(self, **kwargs):
+        self.sample_size = 64
+        self.layers_per_block = 2
+        self.block_out_channels = [320, 640, 1280, 1280]
+        self.downsample = [2, 2, 2, 2]
+        self.upsample = [2, 2, 2, 2]
+        self.cross_attention_dim = 768
+        self.act_fn = "silu"
+        self.norm_num_groups = 32
+        self.num_attention_heads = 8
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+    @classmethod
+    def from_dict(cls, config_dict):
+        return cls(**config_dict)
+class UNet2DConditionModel(nn.Module):
+    def __init__(self, config: UNet2DConditionModelConfig):
+        super().__init__()
+        self.conv_in = nn.Conv2d(4, config.block_out_channels[0], kernel_size=3, padding=1)
+        self.down_blocks = nn.ModuleList([])
+        for i in range(len(config.block_out_channels)):
+            is_final_block = i == len(config.block_out_channels) - 1
+            downsample_factor = 1 if is_final_block else config.downsample[i]
+            out_channels = config.block_out_channels[i]
+            layers_per_block = config.layers_per_block
+            self.down_blocks.append(DownBlock(out_channels, layers_per_block, downsample_factor))
+        self.mid_block = MidBlock(config.block_out_channels[-1])
+        self.up_blocks = nn.ModuleList([])
+        reversed_block_out_channels = list(reversed(config.block_out_channels))
+        reversed_upsample_factors = list(reversed(config.upsample))
+        for i in range(len(config.block_out_channels)):
+            is_final_block = i == len(config.block_out_channels) - 1
+            upsample_factor = 1 if is_final_block else reversed_upsample_factors[i]
+            out_channels = reversed_block_out_channels[i]
+            layers_per_block = config.layers_per_block
+            self.up_blocks.append(UpBlock(out_channels, layers_per_block, upsample_factor))
+        self.norm_out = nn.GroupNorm(num_groups=config.norm_num_groups, num_channels=config.block_out_channels[0])
+        self.conv_norm_out = nn.Conv2d(config.block_out_channels[0], config.block_out_channels[0], kernel_size=3, padding=1)
+        self.conv_out = nn.Conv2d(config.block_out_channels[0], 4, kernel_size=3, padding=1)
+    def forward(self, sample: torch.FloatTensor, timestep: torch.IntTensor, encoder_hidden_states: torch.FloatTensor):
+        sample = self.conv_in(sample)
+        for down_block in self.down_blocks:
+            sample = down_block(sample)
+        sample = self.mid_block(sample)
+        for up_block in self.up_blocks:
+            sample = up_block(sample)
+        sample = self.norm_out(sample)
+        sample = F.silu(sample)
+        sample = self.conv_norm_out(sample)
+        sample = F.silu(sample)
+        sample = self.conv_out(sample)
+        return {"sample": sample}
+class DownBlock(nn.Module):
+    def __init__(self, out_channels, layers_per_block, downsample_factor):
+        super().__init__()
+        self.layers = nn.ModuleList([ResnetBlock(out_channels) for _ in range(layers_per_block)])
+        if downsample_factor > 1:
+            self.downsample = Downsample2D(out_channels, downsample_factor)
+        else:
+            self.downsample = nn.Identity()
+    def forward(self, x):
+        for layer in self.layers:
+            x = layer(x)
+        x = self.downsample(x)
+        return x
+class UpBlock(nn.Module):
+    def __init__(self, out_channels, layers_per_block, upsample_factor):
+        super().__init__()
+        self.layers = nn.ModuleList([ResnetBlock(out_channels) for _ in range(layers_per_block)])
+        if upsample_factor > 1:
+            self.upsample = Upsample2D(out_channels, upsample_factor)
+        else:
+            self.upsample = nn.Identity()
+    def forward(self, x):
+        for layer in self.layers:
+            x = layer(x)
+        x = self.upsample(x)
+        return x
+class ResnetBlock(nn.Module):
+    def __init__(self, channels):
+        super().__init__()
+        self.norm1 = nn.GroupNorm(num_groups=32, num_channels=channels)
+        self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
+        self.norm2 = nn.GroupNorm(num_groups=32, num_channels=channels)
+        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
+        self.residual_conv = nn.Conv2d(channels, channels, kernel_size=1)
+    def forward(self, x):
+        residual = x
+        x = self.norm1(x)
+        x = F.silu(x)
+        x = self.conv1(x)
+        x = self.norm2(x)
+        x = F.silu(x)
+        x = self.conv2(x)
+        return x + self.residual_conv(residual)
+class MidBlock(nn.Module):
+    def __init__(self, channels):
+        super().__init__()
+        self.norm1 = nn.GroupNorm(num_groups=32, num_channels=channels)
+        self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
+        self.norm2 = nn.GroupNorm(num_groups=32, num_channels=channels)
+        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
+    def forward(self, x):
+        x = self.norm1(x)
+        x = F.silu(x)
+        x = self.conv1(x)
+        x = self.norm2(x)
+        x = F.silu(x)
+        x = self.conv2(x)
+        return x
+class Downsample2D(nn.Module):
+    def __init__(self, channels, factor):
+        super().__init__()
+        self.factor = factor
+        self.conv = nn.Conv2d(channels, channels, kernel_size=3, stride=factor, padding=1)
+    def forward(self, x):
+        return self.conv(x)
+class Upsample2D(nn.Module):
+    def __init__(self, channels, factor):
+        super().__init__()
+        self.factor = factor
+        self.conv = nn.ConvTranspose2d(channels, channels, kernel_size=factor, stride=factor)
+    def forward(self, x):
+        return self.conv(x)

lipsync_wav2lip.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import torch
+import torch.nn as nn
+import wget
+import os
+LIPSYNC_FOLDER = "./LipSyncModel"
+LIPSYNC_MODEL_WEIGHTS = "lipsync_expert.pth"
+LIPSYNC_MODEL_WEIGHTS_URL = "https://iiitaphyd-my.sharepoint.com/personal/radrabha_m_research_iiit_ac_in/_layouts/15/download.aspx?SourceUrl=%2Fpersonal%2Fradrabha%5Fm%5Fresearch%5Fiiit%5Fac%5Fin%2FDocuments%2FWav2Lip%5FModels%2Flipsync%5Fexpert%2Epth"
+LIPSYNC_FILES_URLS = [
+    (LIPSYNC_MODEL_WEIGHTS_URL, LIPSYNC_MODEL_WEIGHTS),
+]
+WAV2LIP_FOLDER = "./Wav2LipModel"
+WAV2LIP_MODEL_WEIGHTS = "wav2lip_gan.pth"
+WAV2LIP_MODEL_WEIGHTS_URL = "https://iiitaphyd-my.sharepoint.com/personal/radrabha_m_research_iiit_ac_in/_layouts/15/download.aspx?SourceUrl=%2Fpersonal%2Fradrabha%5Fm%5Fresearch%5Fiiit%5Fac%5Fin%2FDocuments%2FWav2Lip%5FModels%2Fwav2lip%5Fgan%2Epth"
+WAV2LIP_FILES_URLS = [
+    (WAV2LIP_MODEL_WEIGHTS_URL, WAV2LIP_MODEL_WEIGHTS),
+]
+def ensure_lipsync_files_exist():
+    os.makedirs(LIPSYNC_FOLDER, exist_ok=True)
+    for url, filename in LIPSYNC_FILES_URLS:
+        filepath = os.path.join(LIPSYNC_FOLDER, filename)
+        if not os.path.exists(filepath):
+            try:
+                wget.download(url, out=filepath)
+            except Exception as e:
+                print(f"Warning: Download for {filename} failed, likely due to link restrictions. You may need to download it manually.")
+def ensure_wav2lip_files_exist():
+    os.makedirs(WAV2LIP_FOLDER, exist_ok=True)
+    for url, filename in WAV2LIP_FILES_URLS:
+        filepath = os.path.join(WAV2LIP_FOLDER, filename)
+        if not os.path.exists(filepath):
+            try:
+                wget.download(url, out=filepath)
+            except Exception as e:
+                print(f"Warning: Download for {filename} failed, likely due to link restrictions. You may need to download it manually.")
+class LipSyncModel(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.fc = nn.Linear(100, num_classes)
+    def forward(self, x):
+        logits = self.fc(x)
+        return logits
+class Wav2LipModel(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.fc = nn.Linear(100, num_classes)
+    def forward(self, x):
+        logits = self.fc(x)
+        return logits

musicgen_torch.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio
+import wget
+import json
+import os
+MUSICGEN_FOLDER = "./MusicGenModel"
+MUSICGEN_MODEL_NAME = "melody"
+MUSICGEN_MODEL_WEIGHTS = "pytorch_model.bin"
+MUSICGEN_CONFIG = "config.json"
+MUSICGEN_SAMPLE_RATE = 32000
+MUSICGEN_DURATION = 8
+MUSICGEN_MODEL_WEIGHTS_URL = "https://huggingface.co/facebook/musicgen-small/resolve/main/pytorch_model.bin"
+MUSICGEN_CONFIG_URL = "https://huggingface.co/facebook/musicgen-small/resolve/main/config.json"
+MUSICGEN_FILES_URLS = [
+    (MUSICGEN_MODEL_WEIGHTS_URL, MUSICGEN_MODEL_WEIGHTS),
+    (MUSICGEN_CONFIG_URL, MUSICGEN_CONFIG),
+]
+def ensure_musicgen_files_exist():
+    os.makedirs(MUSICGEN_FOLDER, exist_ok=True)
+    for url, filename in MUSICGEN_FILES_URLS:
+        filepath = os.path.join(MUSICGEN_FOLDER, filename)
+        if not os.path.exists(filepath):
+            wget.download(url, out=filepath)
+class MusicGenModel(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.fc = nn.Linear(100, num_classes)
+    def forward(self, x):
+        logits = self.fc(x)
+        return logits

sentiment_roberta.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import torch
+import torch.nn as nn
+import wget
+import json
+import os
+SENTIMENT_FOLDER = "./SentimentModel"
+SENTIMENT_MODEL_WEIGHTS = "pytorch_model.bin"
+SENTIMENT_VOCAB = "sentiment_vocab.json"
+SENTIMENT_CONFIG = "config.json"
+SENTIMENT_MODEL_WEIGHTS_URL = "https://huggingface.co/cardiffnlp/distilroberta-base-sentiment/resolve/main/pytorch_model.bin"
+SENTIMENT_VOCAB_URL = "https://huggingface.co/cardiffnlp/distilroberta-base-sentiment/resolve/main/vocab.json"
+SENTIMENT_CONFIG_URL = "https://huggingface.co/cardiffnlp/distilroberta-base-sentiment/resolve/main/config.json"
+SENTIMENT_FILES_URLS = [
+    (SENTIMENT_MODEL_WEIGHTS_URL, SENTIMENT_MODEL_WEIGHTS),
+    (SENTIMENT_VOCAB_URL, SENTIMENT_VOCAB),
+    (SENTIMENT_CONFIG_URL, SENTIMENT_CONFIG),
+]
+def ensure_sentiment_files_exist():
+    os.makedirs(SENTIMENT_FOLDER, exist_ok=True)
+    for url, filename in SENTIMENT_FILES_URLS:
+        filepath = os.path.join(SENTIMENT_FOLDER, filename)
+        if not os.path.exists(filepath):
+            wget.download(url, out=filepath)
+class RobertaForSequenceClassification(nn.Module):
+    def __init__(self, num_labels):
+        super().__init__()
+        self.dense = nn.Linear(768, 768)
+        self.dropout = nn.Dropout(0.1)
+        self.out_proj = nn.Linear(768, num_labels)
+    def forward(self, sequence_output):
+        x = sequence_output[:, 0, :]
+        x = self.dropout(x)
+        x = self.dense(x)
+        x = torch.tanh(x)
+        x = self.dropout(x)
+        x = self.out_proj(x)
+        return x
+class RobertaModel(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.embeddings = RobertaEmbeddings(config)
+        self.encoder = RobertaEncoder(config)
+    def forward(self, input_ids, attention_mask=None):
+        embedding_output = self.embeddings(input_ids)
+        encoder_outputs = self.encoder(embedding_output, attention_mask=attention_mask)
+        return (encoder_outputs[0], )
+class RobertaEmbeddings(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
+        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.position_ids = torch.arange(config.max_position_embeddings).expand((1, -1))
+    def forward(self, input_ids, token_type_ids=None, position_ids=None):
+        input_shape = input_ids.size()
+        seq_length = input_shape[1]
+        if position_ids is None:
+            position_ids = self.position_ids[:, :seq_length]
+        if token_type_ids is None:
+            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=input_ids.device)
+        input_embeddings = self.word_embeddings(input_ids) + self.position_embeddings(position_ids) + self.token_type_embeddings(token_type_ids)
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+class RobertaEncoder(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.layer = nn.ModuleList([RobertaLayer(config) for _ in range(config.num_hidden_layers)])
+    def forward(self, hidden_states, attention_mask=None):
+        all_encoder_layers = []
+        for layer_module in self.layer:
+            hidden_states = layer_module(hidden_states, attention_mask=attention_mask)
+            all_encoder_layers.append(hidden_states)
+        return (hidden_states, all_encoder_layers)
+class RobertaLayer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.attention = RobertaAttention(config)
+        self.intermediate = RobertaIntermediate(config)
+        self.output = RobertaOutput(config)
+    def forward(self, hidden_states, attention_mask=None):
+        attention_output = self.attention(hidden_states, attention_mask=attention_mask)
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+class RobertaAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.self_attn = RobertaSelfAttention(config)
+        self.output = RobertaSelfOutput(config)
+    def forward(self, hidden_states, attention_mask=None):
+        self_output = self.self_attn(hidden_states, attention_mask=attention_mask)
+        attention_output = self.output(self_output, hidden_states)
+        return attention_output
+class RobertaSelfAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
+                f"heads ({config.num_attention_heads})"
+            )
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+    def forward(self, hidden_states, attention_mask=None):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        if attention_mask is not None:
+            attention_scores = attention_scores + attention_mask
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+        attention_probs = self.dropout(attention_probs)
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+class RobertaSelfOutput(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.all_head_size, config.hidden_size)
+        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+class RobertaIntermediate(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = gelu
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+class RobertaOutput(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states

stt_wav2vec2.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio
+import wget
+import json
+import os
+STT_FOLDER = "./STTModel"
+STT_MODEL_NAME = "wav2vec2"
+STT_MODEL_WEIGHTS = "pytorch_model.bin"
+STT_CONFIG = "config.json"
+STT_VOCAB = "vocab.json"
+STT_MODEL_WEIGHTS_URL = "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/pytorch_model.bin"
+STT_CONFIG_URL = "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/config.json"
+STT_VOCAB_URL = "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/vocab.json"
+STT_FILES_URLS = [
+    (STT_MODEL_WEIGHTS_URL, STT_MODEL_WEIGHTS),
+    (STT_CONFIG_URL, STT_CONFIG),
+    (STT_VOCAB_URL, STT_VOCAB),
+]
+def ensure_stt_files_exist():
+    os.makedirs(STT_FOLDER, exist_ok=True)
+    for url, filename in STT_FILES_URLS:
+        filepath = os.path.join(STT_FOLDER, filename)
+        if not os.path.exists(filepath):
+            wget.download(url, out=filepath)
+class Wav2Vec2ForCTC(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.conv1 = nn.Conv1d(1, 16, kernel_size=5, stride=2, padding=2)
+        self.relu1 = nn.ReLU()
+        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
+        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, stride=2, padding=1)
+        self.relu2 = nn.ReLU()
+        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)
+        self.fc = nn.Linear(32 * 39 * 40, num_classes) # Adjusted input size
+    def forward(self, x):
+        x = self.pool1(self.relu1(self.conv1(x)))
+        x = self.pool2(self.relu2(self.conv2(x)))
+        x = x.view(x.size(0), -1)
+        logits = self.fc(x)
+        return logits

summarization_bart.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import torch
+import torch.nn as nn
+import wget
+import json
+import os
+SUMMARIZATION_FOLDER = "./SummarizationModel"
+SUMMARIZATION_MODEL_WEIGHTS = "pytorch_model.bin"
+SUMMARIZATION_CONFIG = "config.json"
+SUMMARIZATION_VOCAB = "vocab.json"
+SUMMARIZATION_MODEL_WEIGHTS_URL = "https://huggingface.co/facebook/bart-large-cnn/resolve/main/pytorch_model.bin"
+SUMMARIZATION_CONFIG_URL = "https://huggingface.co/facebook/bart-large-cnn/resolve/main/config.json"
+SUMMARIZATION_VOCAB_URL = "https://huggingface.co/facebook/bart-large-cnn/resolve/main/vocab.json"
+SUMMARIZATION_FILES_URLS = [
+    (SUMMARIZATION_MODEL_WEIGHTS_URL, SUMMARIZATION_MODEL_WEIGHTS),
+    (SUMMARIZATION_CONFIG_URL, SUMMARIZATION_CONFIG),
+    (SUMMARIZATION_VOCAB_URL, SUMMARIZATION_VOCAB),
+]
+def ensure_summarization_files_exist():
+    os.makedirs(SUMMARIZATION_FOLDER, exist_ok=True)
+    for url, filename in SUMMARIZATION_FILES_URLS:
+        filepath = os.path.join(SUMMARIZATION_FOLDER, filename)
+        if not os.path.exists(filepath):
+            wget.download(url, out=filepath)
+class BartForConditionalGeneration(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.fc = nn.Linear(100, num_classes)
+    def forward(self, x):
+        logits = self.fc(x)
+        return logits

text_to_video_clip4clip.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import torch
+import torch.nn as nn
+import wget
+import json
+import os
+TEXT_TO_VIDEO_FOLDER = "./TextToVideoModel"
+TEXT_TO_VIDEO_MODEL_WEIGHTS = "pytorch_model.bin"
+TEXT_TO_VIDEO_CONFIG = "config.json"
+TEXT_TO_VIDEO_VOCAB = "vocab.json"
+TEXT_TO_VIDEO_MODEL_WEIGHTS_URL = "https://huggingface.co/Searchium-ai/clip4clip-webvid150k/resolve/main/pytorch_model.bin"
+TEXT_TO_VIDEO_CONFIG_URL = "https://huggingface.co/Searchium-ai/clip4clip-webvid150k/resolve/main/config.json"
+TEXT_TO_VIDEO_VOCAB_URL = "https://huggingface.co/Searchium-ai/clip4clip-webvid150k/resolve/main/vocab.json"
+TEXT_TO_VIDEO_FILES_URLS = [
+    (TEXT_TO_VIDEO_MODEL_WEIGHTS_URL, TEXT_TO_VIDEO_MODEL_WEIGHTS),
+    (TEXT_TO_VIDEO_CONFIG_URL, TEXT_TO_VIDEO_CONFIG),
+    (TEXT_TO_VIDEO_VOCAB_URL, TEXT_TO_VIDEO_VOCAB),
+]
+def ensure_text_to_video_files_exist():
+    os.makedirs(TEXT_TO_VIDEO_FOLDER, exist_ok=True)
+    for url, filename in TEXT_TO_VIDEO_FILES_URLS:
+        filepath = os.path.join(TEXT_TO_VIDEO_FOLDER, filename)
+        if not os.path.exists(filepath):
+            wget.download(url, out=filepath)
+class Clip4ClipModel(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.fc = nn.Linear(100, num_classes)
+    def forward(self, x):
+        logits = self.fc(x)
+        return logits

translation_mbart.py ADDED Viewed

	@@ -0,0 +1,267 @@

+import torch
+import torch.nn as nn
+import wget
+import json
+import os
+import sentencepiece as spm
+import re
+TRANSLATION_FOLDER = "./TranslationModel"
+TRANSLATION_MODEL_WEIGHTS_FILE = "pytorch_model.bin"
+TRANSLATION_MODEL_CONFIG_FILE = "config.json"
+TRANSLATION_MODEL_VOCAB_FILE = "sentencepiece.bpe.model"
+TRANSLATION_MODEL_WEIGHTS_URL = "https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt/resolve/main/pytorch_model.bin"
+TRANSLATION_MODEL_CONFIG_URL = "https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt/resolve/main/config.json"
+TRANSLATION_MODEL_VOCAB_URL = "https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt/resolve/main/sentencepiece.bpe.model"
+TRANSLATION_MODEL_FILES_URLS = [
+    (TRANSLATION_MODEL_WEIGHTS_URL, TRANSLATION_MODEL_WEIGHTS_FILE),
+    (TRANSLATION_MODEL_CONFIG_URL, TRANSLATION_MODEL_CONFIG_FILE),
+    (TRANSLATION_MODEL_VOCAB_URL, TRANSLATION_MODEL_VOCAB_FILE),
+]
+TRANSLATION_SPM_URL = "https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt/resolve/main/sentencepiece.bpe.model"
+TRANSLATION_SPM = "sentencepiece.bpe.model"
+def ensure_translation_files_exist():
+    os.makedirs(TRANSLATION_FOLDER, exist_ok=True)
+    for url, filename in TRANSLATION_MODEL_FILES_URLS:
+        filepath = os.path.join(TRANSLATION_FOLDER, filename)
+        if not os.path.exists(filepath):
+            wget.download(url, out=filepath)
+    filepath_spm = os.path.join(TRANSLATION_FOLDER, TRANSLATION_SPM)
+    if not os.path.exists(filepath_spm):
+        wget.download(TRANSLATION_SPM_URL, out=filepath_spm)
+class MBartConfig:
+    def __init__(self, vocab_size, hidden_size=1024, num_hidden_layers=12, num_attention_heads=16, intermediate_size=4096, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, layer_norm_eps=1e-05, initializer_range=0.02, pad_token_id=1, bos_token_id=0, eos_token_id=2, n_positions=1024, n_ctx=1024, decoder_layers=12, decoder_attention_heads=16, decoder_ffn_dim=4096, encoder_layers=12, encoder_attention_heads=16, encoder_ffn_dim=4096, **kwargs):
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.intermediate_size = intermediate_size
+        self.hidden_act = hidden_act
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.layer_norm_eps = layer_norm_eps
+        self.initializer_range = initializer_range
+        self.pad_token_id = pad_token_id
+        self.bos_token_id = bos_token_id
+        self.eos_token_id = eos_token_id
+        self.n_positions = n_positions
+        self.n_ctx = n_ctx
+        self.decoder_layers = decoder_layers
+        self.decoder_attention_heads = decoder_attention_heads
+        self.decoder_ffn_dim = decoder_ffn_dim
+        self.encoder_layers = encoder_layers
+        self.encoder_attention_heads = encoder_attention_heads
+        self.encoder_ffn_dim = encoder_ffn_dim
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+    @classmethod
+    def from_dict(cls, config_dict):
+        return cls(**config_dict)
+class MBartForConditionalGeneration(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.model = MBartModel(config)
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
+        self.final_logits_bias = nn.Parameter(torch.zeros((1, config.vocab_size)))
+    def forward(self, input_ids, attention_mask=None, decoder_input_ids=None, decoder_attention_mask=None):
+        outputs = self.model(input_ids, attention_mask=attention_mask, decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask)
+        lm_logits = self.lm_head(outputs[0]) + self.final_logits_bias
+        return lm_logits
+class MBartModel(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.encoder = MBartEncoder(config)
+        self.decoder = MBartDecoder(config)
+    def forward(self, input_ids, attention_mask=None, decoder_input_ids=None, decoder_attention_mask=None):
+        encoder_outputs = self.encoder(input_ids, attention_mask=attention_mask)
+        decoder_outputs = self.decoder(decoder_input_ids, encoder_outputs=encoder_outputs, decoder_attention_mask=decoder_attention_mask)
+        return decoder_outputs
+class MBartEncoder(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
+        self.embed_positions = MBartSinusoidalPositionalEmbedding(config.hidden_size, config.pad_token_id)
+        self.layers = nn.ModuleList([MBartEncoderLayer(config) for _ in range(config.encoder_layers)])
+        self.layernorm_embedding = nn.LayerNorm(config.hidden_size)
+    def forward(self, input_ids, attention_mask=None):
+        inputs_embeds = self.embed_tokens(input_ids)
+        position_embeddings = self.embed_positions(input_ids)
+        embeddings = inputs_embeds + position_embeddings
+        embeddings = self.layernorm_embedding(embeddings)
+        encoder_states = embeddings
+        all_encoder_layers = []
+        for layer_module in self.layers:
+            encoder_states = layer_module(encoder_states, encoder_padding_mask=attention_mask)
+            all_encoder_layers.append(encoder_states)
+        return (encoder_states, all_encoder_layers)
+class MBartDecoder(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
+        self.embed_positions = MBartSinusoidalPositionalEmbedding(config.hidden_size, config.pad_token_id)
+        self.layers = nn.ModuleList([MBartDecoderLayer(config) for _ in range(config.decoder_layers)])
+        self.layernorm_embedding = nn.LayerNorm(config.hidden_size)
+    def forward(self, decoder_input_ids, encoder_outputs, decoder_attention_mask=None):
+        inputs_embeds = self.embed_tokens(decoder_input_ids)
+        position_embeddings = self.embed_positions(decoder_input_ids)
+        embeddings = inputs_embeds + position_embeddings
+        embeddings = self.layernorm_embedding(embeddings)
+        decoder_states = embeddings
+        all_decoder_layers = []
+        all_cross_attention_layers = []
+        for layer_module in self.layers:
+            decoder_states, cross_attn_weights = layer_module(decoder_states, encoder_outputs[0], decoder_padding_mask=decoder_attention_mask, encoder_padding_mask=encoder_outputs[0])
+            all_decoder_layers.append(decoder_states)
+            all_cross_attention_layers.append(cross_attn_weights)
+        return (decoder_states, all_decoder_layers, all_cross_attention_layers)
+class MBartSinusoidalPositionalEmbedding(nn.Module):
+    def __init__(self, embedding_dim, padding_idx):
+        super().__init__()
+        self.embedding_dim = embedding_dim
+        self.padding_idx = padding_idx
+    def forward(self, input_ids):
+        seq_len = input_ids.size(1)
+        positions = torch.arange(self.padding_idx + 1, seq_len + self.padding_idx + 1, dtype=torch.long, device=input_ids.device)
+        return self.get_embedding(positions)
+    def get_embedding(self, positions):
+        half_dim = self.embedding_dim // 2
+        emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim, dtype=torch.float, device=positions.device) * -emb)
+        emb = torch.outer(positions.float(), emb)
+        emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
+        if self.embedding_dim % 2 == 1:
+            emb = F.pad(emb, (0, 1, 0, 0))
+        return emb
+class MBartEncoderLayer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.self_attn = MBartAttention(config, embed_dim=config.hidden_size, num_heads=config.encoder_attention_heads)
+        self.self_attn_layer_norm = nn.LayerNorm(config.hidden_size)
+        self.fc1 = nn.Linear(config.hidden_size, config.encoder_ffn_dim)
+        self.fc2 = nn.Linear(config.encoder_ffn_dim, config.hidden_size)
+        self.final_layer_norm = nn.LayerNorm(config.hidden_size)
+    def forward(self, hidden_states, encoder_padding_mask=None):
+        residual = hidden_states
+        hidden_states = self.self_attn_layer_norm(hidden_states)
+        hidden_states, _ = self.self_attn(hidden_states, hidden_states, hidden_states, attention_mask=encoder_padding_mask)
+        hidden_states = residual + hidden_states
+        residual = hidden_states
+        hidden_states = self.final_layer_norm(hidden_states)
+        hidden_states = self.fc2(F.relu(self.fc1(hidden_states)))
+        hidden_states = residual + hidden_states
+        return hidden_states
+class MBartDecoderLayer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.self_attn = MBartAttention(config, embed_dim=config.hidden_size, num_heads=config.decoder_attention_heads)
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+        self.self_attn_layer_norm = nn.LayerNorm(config.hidden_size)
+        self.encoder_attn = MBartAttention(config, embed_dim=config.hidden_size, num_heads=config.decoder_attention_heads)
+        self.encoder_attn_layer_norm = nn.LayerNorm(config.hidden_size)
+        self.fc1 = nn.Linear(config.hidden_size, config.decoder_ffn_dim)
+        self.fc2 = nn.Linear(config.decoder_ffn_dim, config.hidden_size)
+        self.final_layer_norm = nn.LayerNorm(config.hidden_size)
+    def forward(self, hidden_states, encoder_hidden_states, decoder_padding_mask=None, encoder_padding_mask=None):
+        residual = hidden_states
+        hidden_states = self.self_attn_layer_norm(hidden_states)
+        hidden_states, _ = self.self_attn(hidden_states, hidden_states, hidden_states, attention_mask=decoder_padding_mask)
+        hidden_states = residual + hidden_states
+        residual = hidden_states
+        hidden_states = self.encoder_attn_layer_norm(hidden_states)
+        hidden_states, cross_attn_weights = self.encoder_attn(hidden_states, encoder_hidden_states, encoder_hidden_states, attention_mask=encoder_padding_mask)
+        hidden_states = residual + hidden_states
+        residual = hidden_states
+        hidden_states = self.final_layer_norm(hidden_states)
+        hidden_states = self.fc2(F.relu(self.fc1(hidden_states)))
+        hidden_states = residual + hidden_states
+        return hidden_states, cross_attn_weights
+class MBartAttention(nn.Module):
+    def __init__(self, config, embed_dim, num_heads):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.head_dim = embed_dim // num_heads
+        self.scaling = self.head_dim ** -0.5
+        self.k_proj = nn.Linear(embed_dim, embed_dim)
+        self.v_proj = nn.Linear(embed_dim, embed_dim)
+        self.q_proj = nn.Linear(embed_dim, embed_dim)
+        self.out_proj = nn.Linear(embed_dim, embed_dim)
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+    def _shape(self, tensor, seq_len, bsz):
+        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
+    def forward(self, query, key, value, attention_mask=None):
+        bsz, tgt_len, _ = query.size()
+        bsz, src_len, _ = key.size()
+        query = self.q_proj(query)
+        key = self.k_proj(key)
+        value = self.v_proj(value)
+        query = self._shape(query, tgt_len, bsz)
+        key = self._shape(key, src_len, bsz)
+        value = self._shape(value, src_len, bsz)
+        attn_weights = torch.matmul(query, key.transpose(-1, -2)) * self.scaling
+        if attention_mask is not None:
+            attention_mask = attention_mask.float().masked_fill(attention_mask == 0, float('-inf')).masked_fill(attention_mask == 1, float(0.0))
+            attn_weights = attn_weights + attention_mask
+        attn_weights = nn.Softmax(dim=-1)(attn_weights)
+        attn_weights = self.dropout(attn_weights)
+        attn_output = torch.matmul(attn_weights, value)
+        attn_output = attn_output.transpose(1, 2).contiguous().view(bsz, tgt_len, self.embed_dim)
+        attn_output = self.out_proj(attn_output)
+        return attn_output, attn_weights
+class MBartTokenizer:
+    def __init__(self, sentencepiece_processor):
+        self.sp = sentencepiece_processor
+        self.pad_token = "<pad>"
+        self.bos_token = "<s>"
+        self.eos_token = "</s>"
+        self.pad_token_id = 1
+        self.bos_token_id = 0
+        self.eos_token_id = 2
+        self.model_max_length = 1024
+    def __call__(self, text, return_tensors="pt", padding=True, truncation=True, max_length=None, src_lang="en_XX", tgt_lang="es_XX", **kwargs):
+        max_length = max_length if max_length is not None else self.model_max_length
+        self.sp.SetEncodeExtraOptions("bos:<s>,eos:</s>")
+        input_ids = self.sp.EncodeAsIds(f"{src_lang} {text}")
+        if truncation and len(input_ids) > max_length:
+            input_ids = input_ids[:max_length]
+        if padding:
+            input_ids += [self.pad_token_id] * (max_length - len(input_ids))
+        if return_tensors == "pt":
+            return {"input_ids": torch.tensor([input_ids]), "attention_mask": torch.ones(len(input_ids)).unsqueeze(0)}
+        return input_ids
+    def batch_decode(self, token_ids, skip_special_tokens=False, clean_up_tokenization_spaces=True, target_lang="es_XX"):
+        decoded_texts = []
+        for ids in token_ids:
+            text = self.sp.DecodeIds(list(ids))
+            if skip_special_tokens:
+                text = re.sub(r'(<s>|</s>|<pad>)', '', text).strip()
+            if clean_up_tokenization_spaces:
+                text = text.replace(' ', ' ').strip()
+            decoded_texts.append(text.replace(f"{target_lang} ", ""))
+        return decoded_texts

tts_vits.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio
+import wget
+import json
+import os
+TTS_FOLDER = "./TTSModel"
+TTS_MODEL_NAME = "vits"
+TTS_MODEL_CONFIG = "config.json"
+TTS_MODEL_WEIGHTS = "pytorch_model.bin"
+TTS_VOCAB = "vocab.json"
+TTS_CONFIG_URL = "https://huggingface.co/kakao-enterprise/vits-vctk/resolve/main/config.json"
+TTS_MODEL_WEIGHTS_URL = "https://huggingface.co/kakao-enterprise/vits-vctk/resolve/main/pytorch_model.bin"
+TTS_VOCAB_URL = "https://huggingface.co/kakao-enterprise/vits-vctk/resolve/main/vocab.json"
+TTS_FILES_URLS = [
+    (TTS_CONFIG_URL, TTS_MODEL_CONFIG),
+    (TTS_MODEL_WEIGHTS_URL, TTS_MODEL_WEIGHTS),
+    (TTS_VOCAB_URL, TTS_VOCAB),
+]
+def ensure_tts_files_exist():
+    os.makedirs(TTS_FOLDER, exist_ok=True)
+    for url, filename in TTS_FILES_URLS:
+        filepath = os.path.join(TTS_FOLDER, filename)
+        if not os.path.exists(filepath):
+            wget.download(url, out=filepath)
+class VITS(nn.Module):
+    def __init__(self, spec_channels, segment_size, num_speakers, num_languages, num_symbols):
+        super().__init__()
+        self.spec_channels = spec_channels
+        self.segment_size = segment_size
+        self.num_speakers = num_speakers
+        self.num_languages = num_languages
+        self.num_symbols = num_symbols
+        self.embedding = nn.Embedding(num_symbols, 192)
+        self.decoder = Generator(spec_channels)
+    def forward(self, text):
+        x = self.embedding(text)
+        audio = self.decoder(x)
+        return audio
+class Generator(nn.Module):
+    def __init__(self, spec_channels):
+        super().__init__()
+        self.spec_channels = spec_channels
+        self.initial_conv = nn.ConvTranspose2d(192, spec_channels, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
+        self.final_conv = nn.Conv2d(spec_channels, 1, kernel_size=(7, 7), padding=(3, 3))
+    def forward(self, encoder_outputs):
+        x = encoder_outputs.unsqueeze(2)
+        x = self.initial_conv(x)
+        x = self.final_conv(x)
+        return x.squeeze(1)