feat: text editor and blog post

This commit is contained in:
nobody 2025-11-01 12:08:03 -07:00
commit 78297efe5c
Signed by: GrocerPublishAgent
GPG key ID: D460CD54A9E3AB86
17 changed files with 2008 additions and 24 deletions

View file

@ -10,6 +10,7 @@ readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"flask>=2.3.2,<3.0.0",
"flask-cors>=4.0.0,<5.0.0",
"transformers>=4.30.2,<5.0.0",
"nltk>=3.8.1,<4.0.0",
"sentence-transformers>=2.2.2,<3.0.0",

View file

@ -1,25 +1,53 @@
from flask import Flask, request
from flask_cors import CORS
import numpy as np
from .salience import extract, AVAILABLE_MODELS
import json
app = Flask(__name__)
CORS(app, origins=["http://localhost:5173"])
# Load default text from transcript.txt for GET requests
with open('./transcript.txt', 'r') as file:
source_text = file.read().strip()
default_source_text = file.read().strip()
@app.route("/models")
def models_view():
return json.dumps(list(AVAILABLE_MODELS.keys()))
@app.route("/salience")
def salience_view():
@app.route("/salience", methods=['GET'])
def salience_view_default():
"""GET endpoint - processes default text from transcript.txt"""
model_name = request.args.get('model', 'all-mpnet-base-v2')
# Validate model name
if model_name not in AVAILABLE_MODELS:
return json.dumps({'error': f'Invalid model: {model_name}'}), 400
sentence_ranges, adjacency = extract(default_source_text, model_name)
return json.dumps({
'source': default_source_text,
'intervals': sentence_ranges,
'adjacency': np.nan_to_num(adjacency.numpy()).tolist(),
'model': model_name,
})
@app.route("/salience", methods=['POST'])
def salience_view_custom():
"""POST endpoint - processes text from request body"""
model_name = request.args.get('model', 'all-mpnet-base-v2')
# Validate model name
if model_name not in AVAILABLE_MODELS:
return json.dumps({'error': f'Invalid model: {model_name}'}), 400
# Get document content from request body as plain text
source_text = request.data.decode('utf-8').strip()
if not source_text:
return json.dumps({'error': 'No text provided'}), 400
sentence_ranges, adjacency = extract(source_text, model_name)
return json.dumps({

View file

@ -83,16 +83,31 @@ def text_rank(sentences, model_name='all-mpnet-base-v2'):
adjacency[adjacency < 0] = 0
return normalized_adjacency(adjacency)
def extract(source_text, model_name='all-mpnet-base-v2'):
"""
Main API function that extracts sentence positions and computes normalized adjacency matrix.
Returns:
sentence_ranges: List of (start, end) tuples for each sentence's character position
adjacency: (N × N) normalized adjacency matrix where N is the number of sentences.
Each entry (i,j) represents the normalized similarity between sentences i and j.
This matrix is returned to the frontend, which raises it to a power and computes
the final salience scores via random walk simulation.
"""
sentences, sentence_ranges = get_sentences(source_text)
adjacency = text_rank(sentences, model_name)
return sentence_ranges, adjacency
# =============================================================================
# Unused/Debugging Code
# =============================================================================
def terminal_distr(adjacency, initial=None):
sample = initial if initial is not None else torch.full((adjacency.shape[0],), 1.)
scores = sample.matmul(torch.matrix_power(adjacency, 10)).numpy().tolist()
return scores
def extract(source_text, model_name='all-mpnet-base-v2'):
sentences, sentence_ranges = get_sentences(source_text)
adjacency = text_rank(sentences, model_name)
return sentence_ranges, adjacency
def get_results(sentences, adjacency):
scores = terminal_distr(adjacency)
for score, sentence in sorted(zip(scores, sentences), key=lambda xs: xs[0]):

14
api/uv.lock generated
View file

@ -142,6 +142,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fd/56/26f0be8adc2b4257df20c1c4260ddd0aa396cf8e75d90ab2f7ff99bc34f9/flask-2.3.3-py3-none-any.whl", hash = "sha256:f69fcd559dc907ed196ab9df0e48471709175e696d6e698dd4dbe940f96ce66b", size = 96112 },
]
[[package]]
name = "flask-cors"
version = "4.0.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "flask" },
]
sdist = { url = "https://files.pythonhosted.org/packages/1c/41/89ea5af8b9d647036237c528abb2fdf8bb10b23b3f750e8e2da07873b270/flask_cors-4.0.2.tar.gz", hash = "sha256:493b98e2d1e2f1a4720a7af25693ef2fe32fbafec09a2f72c59f3e475eda61d2", size = 30954 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e1/60/e941089faf4f50f2e0231d7f7af69308616a37e99da3ec75df60b8809db7/Flask_Cors-4.0.2-py2.py3-none-any.whl", hash = "sha256:38364faf1a7a5d0a55bd1d2e2f83ee9e359039182f5e6a029557e1f56d92c09a", size = 14467 },
]
[[package]]
name = "fsspec"
version = "2025.10.0"
@ -789,6 +801,7 @@ version = "0.0.0"
source = { editable = "." }
dependencies = [
{ name = "flask" },
{ name = "flask-cors" },
{ name = "nltk" },
{ name = "numpy" },
{ name = "sentence-transformers" },
@ -798,6 +811,7 @@ dependencies = [
[package.metadata]
requires-dist = [
{ name = "flask", specifier = ">=2.3.2,<3.0.0" },
{ name = "flask-cors", specifier = ">=4.0.0,<5.0.0" },
{ name = "nltk", specifier = ">=3.8.1,<4.0.0" },
{ name = "numpy", specifier = ">=1.25.0,<2.0.0" },
{ name = "sentence-transformers", specifier = ">=2.2.2,<3.0.0" },