feat: create deployment scripts
This commit is contained in:
parent
78297efe5c
commit
8d5bce4bfb
22 changed files with 2697 additions and 74 deletions
113
api/benchmarks/test_bench_cosine_sim.py
Normal file
113
api/benchmarks/test_bench_cosine_sim.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
"""
|
||||
Benchmark different cosine similarity implementations using pytest-benchmark.
|
||||
|
||||
First run: python generate_embeddings.py
|
||||
Then run: pytest test_bench_cosine_sim.py --benchmark-json=genfiles/benchmark_results.json
|
||||
To visualize: python visualize_benchmarks.py genfiles/benchmark_results.json
|
||||
"""
|
||||
import os
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
# Load pre-generated embeddings once for all tests
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
embeddings_path = os.path.join(script_dir, 'genfiles', 'embeddings.npy')
|
||||
vectors = np.load(embeddings_path)
|
||||
|
||||
|
||||
# Original cos_sim function from salience.py
|
||||
def cos_sim_original(a, b):
|
||||
sims = a @ b.T
|
||||
a_norm = np.linalg.norm(a, axis=-1)
|
||||
b_norm = np.linalg.norm(b, axis=-1)
|
||||
a_normalized = (sims.T / a_norm.T).T
|
||||
sims = a_normalized / b_norm
|
||||
return sims
|
||||
|
||||
|
||||
# Nested for loop version
|
||||
def cos_sim_nested_loop(a, b):
|
||||
n = a.shape[0]
|
||||
m = b.shape[0]
|
||||
sims = np.zeros((n, m))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(m):
|
||||
dot_product = np.dot(a[i], b[j])
|
||||
norm_a = np.linalg.norm(a[i])
|
||||
norm_b = np.linalg.norm(b[j])
|
||||
sims[i, j] = dot_product / (norm_a * norm_b)
|
||||
|
||||
return sims
|
||||
|
||||
|
||||
# E*E^T with manual in-place normalization
|
||||
def cos_sim_inplace_norm(a, b):
|
||||
# Compute raw dot products
|
||||
sims = a @ b.T
|
||||
|
||||
# Compute norms once
|
||||
a_norms = np.linalg.norm(a, axis=-1)
|
||||
b_norms = np.linalg.norm(b, axis=-1)
|
||||
|
||||
# Normalize in place
|
||||
for i in range(sims.shape[0]):
|
||||
for j in range(sims.shape[1]):
|
||||
sims[i, j] = sims[i, j] / (a_norms[i] * b_norms[j])
|
||||
|
||||
return sims
|
||||
|
||||
|
||||
# Broadcast division with in-place operations
|
||||
def cos_sim_broadcast_inplace(a, b):
|
||||
# Compute raw dot products
|
||||
sims = a @ b.T
|
||||
|
||||
# Compute norms once
|
||||
a_norms = np.linalg.norm(a, axis=-1, keepdims=True) # shape (n, 1)
|
||||
b_norms = np.linalg.norm(b, axis=-1, keepdims=True) # shape (m, 1)
|
||||
|
||||
# Divide by a_norms (broadcasting across columns)
|
||||
sims /= a_norms
|
||||
# Divide by b_norms.T (broadcasting across rows)
|
||||
sims /= b_norms.T
|
||||
|
||||
return sims
|
||||
|
||||
|
||||
# Verify all implementations produce the same results
|
||||
def test_correctness():
|
||||
"""Verify all implementations produce identical results"""
|
||||
result_original = cos_sim_original(vectors, vectors)
|
||||
result_nested = cos_sim_nested_loop(vectors, vectors)
|
||||
result_inplace = cos_sim_inplace_norm(vectors, vectors)
|
||||
result_broadcast = cos_sim_broadcast_inplace(vectors, vectors)
|
||||
|
||||
assert np.allclose(result_original, result_nested, atol=1e-6)
|
||||
assert np.allclose(result_original, result_inplace, atol=1e-6)
|
||||
assert np.allclose(result_original, result_broadcast, atol=1e-6)
|
||||
|
||||
|
||||
# Benchmark tests
|
||||
def test_bench_original(benchmark):
|
||||
"""Original vectorized implementation"""
|
||||
result = benchmark(cos_sim_original, vectors, vectors)
|
||||
assert result.shape == (vectors.shape[0], vectors.shape[0])
|
||||
|
||||
|
||||
def test_bench_nested_loop(benchmark):
|
||||
"""Nested loop implementation"""
|
||||
result = benchmark(cos_sim_nested_loop, vectors, vectors)
|
||||
assert result.shape == (vectors.shape[0], vectors.shape[0])
|
||||
|
||||
|
||||
def test_bench_inplace_norm(benchmark):
|
||||
"""E*E^T with in-place normalization"""
|
||||
result = benchmark(cos_sim_inplace_norm, vectors, vectors)
|
||||
assert result.shape == (vectors.shape[0], vectors.shape[0])
|
||||
|
||||
|
||||
def test_bench_broadcast_inplace(benchmark):
|
||||
"""Broadcast with in-place operations"""
|
||||
result = benchmark(cos_sim_broadcast_inplace, vectors, vectors)
|
||||
assert result.shape == (vectors.shape[0], vectors.shape[0])
|
||||
Loading…
Add table
Add a link
Reference in a new issue