import sys import os import numpy as np # Add the parent directory to the path so we can import salience script_dir = os.path.dirname(os.path.abspath(__file__)) parent_dir = os.path.dirname(script_dir) sys.path.insert(0, parent_dir) from salience.salience import models, get_sentences # Load the transcript transcript_path = os.path.join(parent_dir, 'transcript-1.txt') with open(transcript_path, 'r') as f: source_text = f.read() # Get sentences and encode them print("Loading transcript and encoding sentences...") sentences, sentence_ranges = get_sentences(source_text) print(f"Number of sentences: {len(sentences)}") # Use the default model for comparison model_name = 'all-mpnet-base-v2' model = models[model_name] vectors = model.encode(sentences) print(f"Vector shape: {vectors.shape}") # Save the embeddings to genfiles directory genfiles_dir = os.path.join(script_dir, 'genfiles') os.makedirs(genfiles_dir, exist_ok=True) output_path = os.path.join(genfiles_dir, 'embeddings.npy') np.save(output_path, vectors) print(f"\nEmbeddings saved to: {output_path}") print(f"File size: {os.path.getsize(output_path) / 1024 / 1024:.2f} MB")