feat: create deployment scripts
This commit is contained in:
parent
78297efe5c
commit
8d5bce4bfb
22 changed files with 2697 additions and 74 deletions
34
api/benchmarks/01-generate_embeddings.py
Normal file
34
api/benchmarks/01-generate_embeddings.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import sys
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
# Add the parent directory to the path so we can import salience
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
parent_dir = os.path.dirname(script_dir)
|
||||
sys.path.insert(0, parent_dir)
|
||||
|
||||
from salience.salience import models, get_sentences
|
||||
|
||||
# Load the transcript
|
||||
transcript_path = os.path.join(parent_dir, 'transcript-1.txt')
|
||||
with open(transcript_path, 'r') as f:
|
||||
source_text = f.read()
|
||||
|
||||
# Get sentences and encode them
|
||||
print("Loading transcript and encoding sentences...")
|
||||
sentences, sentence_ranges = get_sentences(source_text)
|
||||
print(f"Number of sentences: {len(sentences)}")
|
||||
|
||||
# Use the default model for comparison
|
||||
model_name = 'all-mpnet-base-v2'
|
||||
model = models[model_name]
|
||||
vectors = model.encode(sentences)
|
||||
print(f"Vector shape: {vectors.shape}")
|
||||
|
||||
# Save the embeddings to genfiles directory
|
||||
genfiles_dir = os.path.join(script_dir, 'genfiles')
|
||||
os.makedirs(genfiles_dir, exist_ok=True)
|
||||
output_path = os.path.join(genfiles_dir, 'embeddings.npy')
|
||||
np.save(output_path, vectors)
|
||||
print(f"\nEmbeddings saved to: {output_path}")
|
||||
print(f"File size: {os.path.getsize(output_path) / 1024 / 1024:.2f} MB")
|
||||
Loading…
Add table
Add a link
Reference in a new issue