SURIYA-KP commited on
Commit
17bf656
·
verified ·
1 Parent(s): d4957da

Add example usage script

Browse files
Files changed (1) hide show
  1. example_usage.py +40 -0
example_usage.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ from transformers import AutoModel, AutoTokenizer
4
+
5
+ # Load model and tokenizer
6
+ model_name = "SURIYA-KP/nomic-embed-text-v2-moe-fine-tuned-depression-symptoms"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModel.from_pretrained(model_name)
9
+
10
+ # Function to get embeddings
11
+ def get_embedding(text):
12
+ # Tokenize
13
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
14
+
15
+ # Get model output
16
+ with torch.no_grad():
17
+ outputs = model(**inputs)
18
+
19
+ # Mean pooling
20
+ token_embeddings = outputs.last_hidden_state
21
+ attention_mask = inputs['attention_mask']
22
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
23
+ embedding = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
24
+
25
+ return embedding.numpy()[0]
26
+
27
+ # Example usage
28
+ text1 = "I feel worthless and useless."
29
+ text2 = "I am feeling happy and content today."
30
+
31
+ emb1 = get_embedding(text1)
32
+ emb2 = get_embedding(text2)
33
+
34
+ # Calculate cosine similarity
35
+ cos_sim = torch.nn.functional.cosine_similarity(
36
+ torch.tensor(emb1).unsqueeze(0),
37
+ torch.tensor(emb2).unsqueeze(0)
38
+ ).item()
39
+
40
+ print(f"Cosine similarity between texts: {cos_sim:.4f}")