salience-editor/api/benchmarks/01-generate_embeddings.py

import sys
import os
import numpy as np

# Add the parent directory to the path so we can import salience
script_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(script_dir)
sys.path.insert(0, parent_dir)

from salience.salience import models, get_sentences

# Load the transcript
transcript_path = os.path.join(parent_dir, 'transcript-1.txt')
with open(transcript_path, 'r') as f:
    source_text = f.read()

# Get sentences and encode them
print("Loading transcript and encoding sentences...")
sentences, sentence_ranges = get_sentences(source_text)
print(f"Number of sentences: {len(sentences)}")

# Use the default model for comparison
model_name = 'all-mpnet-base-v2'
model = models[model_name]
vectors = model.encode(sentences)
print(f"Vector shape: {vectors.shape}")

# Save the embeddings to genfiles directory
genfiles_dir = os.path.join(script_dir, 'genfiles')
os.makedirs(genfiles_dir, exist_ok=True)
output_path = os.path.join(genfiles_dir, 'embeddings.npy')
np.save(output_path, vectors)
print(f"\nEmbeddings saved to: {output_path}")
print(f"File size: {os.path.getsize(output_path) / 1024 / 1024:.2f} MB")
feat: create deployment scripts 2025-11-02 13:09:23 -08:00			`import sys`
			`import os`
			`import numpy as np`

			`# Add the parent directory to the path so we can import salience`
			`script_dir = os.path.dirname(os.path.abspath(__file__))`
			`parent_dir = os.path.dirname(script_dir)`
			`sys.path.insert(0, parent_dir)`

			`from salience.salience import models, get_sentences`

			`# Load the transcript`
			`transcript_path = os.path.join(parent_dir, 'transcript-1.txt')`
			`with open(transcript_path, 'r') as f:`
			`source_text = f.read()`

			`# Get sentences and encode them`
			`print("Loading transcript and encoding sentences...")`
			`sentences, sentence_ranges = get_sentences(source_text)`
			`print(f"Number of sentences: {len(sentences)}")`

			`# Use the default model for comparison`
			`model_name = 'all-mpnet-base-v2'`
			`model = models[model_name]`
			`vectors = model.encode(sentences)`
			`print(f"Vector shape: {vectors.shape}")`

			`# Save the embeddings to genfiles directory`
			`genfiles_dir = os.path.join(script_dir, 'genfiles')`
			`os.makedirs(genfiles_dir, exist_ok=True)`
			`output_path = os.path.join(genfiles_dir, 'embeddings.npy')`
			`np.save(output_path, vectors)`
			`print(f"\nEmbeddings saved to: {output_path}")`
			`print(f"File size: {os.path.getsize(output_path) / 1024 / 1024:.2f} MB")`