Pulastya0 commited on
Commit
9ff4938
Β·
1 Parent(s): 6b4ca68

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +17 -0
  2. encoder.py +15 -0
  3. main.py +172 -0
  4. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /code
6
+
7
+ # Copy the requirements file into the container
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ # Install the packages
11
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ # Copy your entire project's source code into the container
14
+ COPY . /code/
15
+
16
+ # Run the app using Gunicorn, a production-ready server
17
+ CMD ["gunicorn", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "main:app", "--bind", "0.0.0.0:7860"]
encoder.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+
3
+ class SentenceEncoder:
4
+ def __init__(self, model_name='l3cube-pune/indic-sentence-similarity-sbert'):
5
+ try:
6
+ self.model = SentenceTransformer(model_name)
7
+ print(f"βœ… Model '{model_name}' loaded successfully.")
8
+ except Exception as e:
9
+ print(f"❌ Error loading model: {e}")
10
+ self.model = None
11
+
12
+ def encode(self, texts, batch_size=32, show_progress_bar=False):
13
+ if self.model is None:
14
+ return None
15
+ return self.model.encode(texts, batch_size=batch_size, show_progress_bar=show_progress_bar, convert_to_tensor=True)
main.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import chromadb
4
+ from fastapi import FastAPI, HTTPException, Depends
5
+ from pydantic import BaseModel, Field
6
+ from typing import List
7
+ import firebase_admin
8
+ from firebase_admin import credentials, firestore
9
+
10
+ from encoder import SentenceEncoder
11
+
12
+ # --- Pydantic Models ---
13
+ class UserProfile(BaseModel):
14
+ user_id: str
15
+ skills: List[str] = Field(..., example=["python", "data analysis"])
16
+ interests: List[str] = Field(..., example=["machine learning", "web development"])
17
+
18
+ class SearchQuery(BaseModel):
19
+ query: str = Field(..., example="marketing internship in mumbai")
20
+
21
+ # --- SCHEMA CHANGED HERE ---
22
+ # Reverted to use 'id' and 'skills'
23
+ class InternshipData(BaseModel):
24
+ id: str = Field(..., example="int_021")
25
+ title: str
26
+ description: str
27
+ skills: List[str]
28
+ duration: int
29
+ createdAt: str
30
+ stipend: int = None
31
+
32
+ class RecommendationResponse(BaseModel):
33
+ recommendations: List[dict]
34
+
35
+ class StatusResponse(BaseModel):
36
+ status: str
37
+ internship_id: str
38
+
39
+ # --- FastAPI App & Firebase Initialization ---
40
+ app = FastAPI(
41
+ title="Internship Recommendation API",
42
+ description="An API using Firestore for metadata, and ChromaDB for vector search.",
43
+ version="2.1.0"
44
+ )
45
+
46
+ # Initialize Firebase ONCE at startup
47
+ try:
48
+ if 'FIREBASE_CREDS_JSON' in os.environ:
49
+ creds_dict = json.loads(os.environ.get('FIREBASE_CREDS_JSON'))
50
+ cred = credentials.Certificate(creds_dict)
51
+ else:
52
+ cred = credentials.Certificate('serviceAccountKey.json')
53
+
54
+ firebase_admin.initialize_app(cred)
55
+ db = firestore.client()
56
+ print("βœ… Firebase connection initialized.")
57
+ except Exception as e:
58
+ print(f"❌ Could not initialize Firebase. Error: {e}")
59
+ db = None
60
+
61
+ # Dependency to provide the db client
62
+ def get_db():
63
+ if db is None:
64
+ raise HTTPException(status_code=503, detail="Firestore connection not available.")
65
+ return db
66
+
67
+ # --- Global Variables for Model and ChromaDB ---
68
+ encoder = None
69
+ chroma_collection = None
70
+
71
+ @app.on_event("startup")
72
+ def load_model_and_data():
73
+ global encoder, chroma_collection
74
+
75
+ print("πŸš€ Loading sentence encoder model...")
76
+ encoder = SentenceEncoder()
77
+
78
+ client = chromadb.PersistentClient(path="/content/chroma_db")
79
+ chroma_collection = client.get_or_create_collection(name="internships")
80
+
81
+ print("βœ… ChromaDB client initialized and collection is ready.")
82
+ print(f" - Internships in DB: {chroma_collection.count()}")
83
+
84
+ # --- API Endpoints ---
85
+ @app.get("/")
86
+ def read_root():
87
+ return {"message": "Welcome to the Internship Recommendation API!"}
88
+
89
+ @app.post("/add-internship", response_model=StatusResponse)
90
+ def add_internship(internship: InternshipData, db_client: firestore.Client = Depends(get_db)):
91
+ if chroma_collection is None or encoder is None:
92
+ raise HTTPException(status_code=503, detail="Server is not ready.")
93
+
94
+ # --- SCHEMA CHANGED HERE ---
95
+ # Using internship.id
96
+ doc_ref = db_client.collection('internships').document(internship.id)
97
+ if doc_ref.get().exists:
98
+ raise HTTPException(status_code=400, detail="Internship ID already exists.")
99
+
100
+ # Save to Firestore
101
+ doc_ref.set(internship.dict())
102
+
103
+ # --- SCHEMA CHANGED HERE ---
104
+ # Using internship.skills
105
+ text_to_encode = f"{internship.title}. {internship.description}. Skills: {', '.join(internship.skills)}"
106
+ embedding = encoder.encode([text_to_encode])[0].tolist()
107
+
108
+ # --- CRITICAL FIX RE-APPLIED HERE ---
109
+ # Prepare metadata for ChromaDB, converting skills list to a JSON string
110
+ metadata_for_chroma = internship.dict()
111
+ metadata_for_chroma['skills'] = json.dumps(metadata_for_chroma['skills'])
112
+
113
+ chroma_collection.add(
114
+ # --- SCHEMA CHANGED HERE ---
115
+ # Using internship.id
116
+ ids=[internship.id],
117
+ embeddings=[embedding],
118
+ metadatas=[metadata_for_chroma]
119
+ )
120
+
121
+ print(f"βœ… Added internship to Firestore and ChromaDB: {internship.id}")
122
+ # --- SCHEMA CHANGED HERE ---
123
+ return {"status": "success", "internship_id": internship.id}
124
+
125
+ @app.post("/profile-recommendations", response_model=RecommendationResponse)
126
+ def get_profile_recommendations(profile: UserProfile):
127
+ if chroma_collection is None or encoder is None:
128
+ raise HTTPException(status_code=503, detail="Server is not ready.")
129
+
130
+ query_text = f"Skills: {', '.join(profile.skills)}. Interests: {', '.join(profile.interests)}"
131
+ query_embedding = encoder.encode([query_text])[0].tolist()
132
+
133
+ results = chroma_collection.query(
134
+ query_embeddings=[query_embedding],
135
+ n_results=3
136
+ )
137
+
138
+ recommendations = []
139
+ ids = results.get('ids', [[]])[0]
140
+ distances = results.get('distances', [[]])[0]
141
+
142
+ for i, internship_id in enumerate(ids):
143
+ recommendations.append({
144
+ "internship_id": internship_id,
145
+ "score": 1 - distances[i]
146
+ })
147
+
148
+ return {"recommendations": recommendations}
149
+
150
+ @app.post("/search", response_model=RecommendationResponse)
151
+ def search_internships(search: SearchQuery):
152
+ if chroma_collection is None or encoder is None:
153
+ raise HTTPException(status_code=503, detail="Server is not ready.")
154
+
155
+ query_embedding = encoder.encode([search.query])[0].tolist()
156
+
157
+ results = chroma_collection.query(
158
+ query_embeddings=[query_embedding],
159
+ n_results=3
160
+ )
161
+
162
+ recommendations = []
163
+ ids = results.get('ids', [[]])[0]
164
+ distances = results.get('distances', [[]])[0]
165
+
166
+ for i, internship_id in enumerate(ids):
167
+ recommendations.append({
168
+ "internship_id": internship_id,
169
+ "score": 1 - distances[i]
170
+ })
171
+
172
+ return {"recommendations": recommendations}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ pydantic
4
+ sentence-transformers
5
+ torch
6
+ numpy
7
+ scikit-learn
8
+ firebase-admin