Spaces:
Sleeping
Sleeping
| from transformers import AutoTokenizer, AutoModel ,AutoConfig | |
| import torch | |
| from transformers import ViTImageProcessor, VisionEncoderDecoderModel,RobertaTokenizerFast | |
| import PIL | |
| import streamlit as st | |
| from PIL import Image | |
| def set_page_config(): | |
| st.set_page_config( | |
| page_title='Caption an Cartoon Image', | |
| page_icon=':camera:', | |
| layout='wide', | |
| ) | |
| def initialize_model(): | |
| device = 'cpu' | |
| config = AutoConfig.from_pretrained("sourabhbargi11/Caption_generator_model") | |
| model = VisionEncoderDecoderModel.from_pretrained("sourabhbargi11/Caption_generator_model", config=config) | |
| tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base") | |
| image_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224") | |
| return image_processor, model,tokenizer, device | |
| def upload_image(): | |
| return st.sidebar.file_uploader("Upload an image (we aren't storing anything)", type=["jpg", "jpeg", "png"]) | |
| def image_preprocess(image): | |
| image = image.resize((224,224)) | |
| if image.mode == "L": | |
| image = image.convert("RGB") | |
| return image | |
| def generate_caption(image_processor, model, tokenizer,device, image): | |
| inputs = image_processor(image, return_tensors='pt') | |
| output = model.generate(**inputs) | |
| caption = tokenizer.decode(output[0], skip_special_tokens=True) | |
| return caption | |
| def main(): | |
| set_page_config() | |
| st.header("Caption an Image :camera:") | |
| uploaded_image = upload_image() | |
| if uploaded_image is not None: | |
| image = Image.open(uploaded_image) | |
| image = image_preprocess(image) | |
| st.image(image, caption='Your image') | |
| with st.sidebar: | |
| st.divider() | |
| if st.sidebar.button('Generate Caption'): | |
| with st.spinner('Generating caption...'): | |
| image_processor, model,tokenizer, device = initialize_model() | |
| caption = generate_caption(image_processor, model, tokenizer,device, image) | |
| st.header("Caption:") | |
| st.markdown(f'**{caption}**') | |
| if __name__ == '__main__': | |
| main() | |
| # st.markdown(""" | |
| # --- | |
| # You are looking at partial tuned model , please JUDGE ME!!! (I am Funny , Sensible , Creative )""") | |
| st.markdown(""" | |
| --- | |
| You are looking at a partially tuned model. Judge me! (I am Funny and Creative) ππ¨""") | |