Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| import torch | |
| import numpy as np | |
| import fitz # PyMuPDF | |
| import pandas as pd | |
| import io | |
| # Load the model and tokenizer from Hugging Face | |
| model_name = "KevSun/Engessay_grading_ML" | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # Streamlit app | |
| st.title("Automated Scoring App") | |
| st.write("Enter your English essay below to predict scores from multiple dimensions:") | |
| # Replace text input with file uploader | |
| uploaded_file = st.file_uploader("Upload your PDF essay:", type=['pdf']) | |
| if uploaded_file: | |
| # Convert uploaded file to bytes for fitz | |
| pdf_bytes = uploaded_file.read() | |
| # Read and display PDF content | |
| with fitz.open(stream=pdf_bytes, filetype="pdf") as doc: | |
| text_content = "" | |
| for page in doc: | |
| text_content += page.get_text() | |
| # Display the extracted text | |
| st.write("Extracted text from PDF:") | |
| st.text_area("PDF Content", text_content, height=200, disabled=True) | |
| if st.button("Predict"): | |
| if uploaded_file: | |
| # Use the already extracted text_content for prediction | |
| # Tokenize input text with truncation | |
| inputs = tokenizer( | |
| text_content, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=512 # Standard BERT/RoBERTa max length | |
| ) | |
| # After tokenization | |
| token_count = len(inputs['input_ids'][0]) | |
| if token_count == 512: | |
| st.warning("⚠️ The text was too long and has been truncated to fit the model's maximum length. This might affect the accuracy of the predictions.") | |
| # Get predictions from the model | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| # Extract and process predictions | |
| predictions = outputs.logits.squeeze() | |
| predicted_scores = predictions.numpy() | |
| # Scale the predictions | |
| scaled_scores = 2.25 * predicted_scores - 1.25 | |
| rounded_scores = [round(score * 2) / 2 for score in scaled_scores] | |
| # Create results DataFrame | |
| labels = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"] | |
| results_dict = { | |
| 'Dimension': labels, | |
| 'Score': rounded_scores | |
| } | |
| df = pd.DataFrame(results_dict) | |
| # Display results in app | |
| st.write("Scores:") | |
| st.dataframe(df) | |
| # Save CSV locally | |
| local_path = "essay_scores.csv" | |
| df.to_csv(local_path, index=False) | |
| st.success(f"Results saved locally to {local_path}") | |
| # Create download button for CSV | |
| csv = df.to_csv(index=False) | |
| st.download_button( | |
| label="Download results as CSV", | |
| data=csv, | |
| file_name="essay_scores.csv", | |
| mime="text/csv" | |
| ) | |
| else: | |
| st.write("Please upload a PDF file to get scores.") | |