feat: create deployment scripts

2025-11-02 13:09:23 -08:00 · 2025-11-02 13:09:23 -08:00 · 8d5bce4bfb
commit 8d5bce4bfb
parent 78297efe5c
22 changed files with 2697 additions and 74 deletions
--- a/api/benchmarks/01-generate_embeddings.py
+++ b/api/benchmarks/01-generate_embeddings.py
@ -0,0 +1,34 @@
+import sys
+import os
+import numpy as np
+
+# Add the parent directory to the path so we can import salience
+script_dir = os.path.dirname(os.path.abspath(__file__))
+parent_dir = os.path.dirname(script_dir)
+sys.path.insert(0, parent_dir)
+
+from salience.salience import models, get_sentences
+
+# Load the transcript
+transcript_path = os.path.join(parent_dir, 'transcript-1.txt')
+with open(transcript_path, 'r') as f:
+    source_text = f.read()
+
+# Get sentences and encode them
+print("Loading transcript and encoding sentences...")
+sentences, sentence_ranges = get_sentences(source_text)
+print(f"Number of sentences: {len(sentences)}")
+
+# Use the default model for comparison
+model_name = 'all-mpnet-base-v2'
+model = models[model_name]
+vectors = model.encode(sentences)
+print(f"Vector shape: {vectors.shape}")
+
+# Save the embeddings to genfiles directory
+genfiles_dir = os.path.join(script_dir, 'genfiles')
+os.makedirs(genfiles_dir, exist_ok=True)
+output_path = os.path.join(genfiles_dir, 'embeddings.npy')
+np.save(output_path, vectors)
+print(f"\nEmbeddings saved to: {output_path}")
+print(f"File size: {os.path.getsize(output_path) / 1024 / 1024:.2f} MB")