Spaces:

devhem
/

AI-grading-system

Runtime error

App Files Files Community

AI-grading-system / app.py

devhem

Create app.py

cace677 verified 12 months ago

raw

history blame contribute delete

3.06 kB

	import streamlit as st
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import torch
	import numpy as np
	import fitz # PyMuPDF
	import pandas as pd
	import io

	# Load the model and tokenizer from Hugging Face
	model_name = "KevSun/Engessay_grading_ML"
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Streamlit app
	st.title("Automated Scoring App")
	st.write("Enter your English essay below to predict scores from multiple dimensions:")

	# Replace text input with file uploader
	uploaded_file = st.file_uploader("Upload your PDF essay:", type=['pdf'])

	if uploaded_file:
	# Convert uploaded file to bytes for fitz
	pdf_bytes = uploaded_file.read()

	# Read and display PDF content
	with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
	text_content = ""
	for page in doc:
	text_content += page.get_text()

	# Display the extracted text
	st.write("Extracted text from PDF:")
	st.text_area("PDF Content", text_content, height=200, disabled=True)

	if st.button("Predict"):
	if uploaded_file:
	# Use the already extracted text_content for prediction
	# Tokenize input text with truncation
	inputs = tokenizer(
	text_content,
	return_tensors="pt",
	truncation=True,
	max_length=512 # Standard BERT/RoBERTa max length
	)

	# After tokenization
	token_count = len(inputs['input_ids'][0])
	if token_count == 512:
	st.warning("⚠️ The text was too long and has been truncated to fit the model's maximum length. This might affect the accuracy of the predictions.")

	# Get predictions from the model
	with torch.no_grad():
	outputs = model(**inputs)

	# Extract and process predictions
	predictions = outputs.logits.squeeze()
	predicted_scores = predictions.numpy()

	# Scale the predictions
	scaled_scores = 2.25 * predicted_scores - 1.25
	rounded_scores = [round(score * 2) / 2 for score in scaled_scores]

	# Create results DataFrame
	labels = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
	results_dict = {
	'Dimension': labels,
	'Score': rounded_scores
	}
	df = pd.DataFrame(results_dict)

	# Display results in app
	st.write("Scores:")
	st.dataframe(df)

	# Save CSV locally
	local_path = "essay_scores.csv"
	df.to_csv(local_path, index=False)
	st.success(f"Results saved locally to {local_path}")

	# Create download button for CSV
	csv = df.to_csv(index=False)
	st.download_button(
	label="Download results as CSV",
	data=csv,
	file_name="essay_scores.csv",
	mime="text/csv"
	)
	else:
	st.write("Please upload a PDF file to get scores.")