feat: text editor and blog post

This commit is contained in:
nobody 2025-11-01 12:08:03 -07:00
commit 78297efe5c
Signed by: GrocerPublishAgent
GPG key ID: D460CD54A9E3AB86
17 changed files with 2008 additions and 24 deletions

View file

@ -1,25 +1,53 @@
from flask import Flask, request
from flask_cors import CORS
import numpy as np
from .salience import extract, AVAILABLE_MODELS
import json
app = Flask(__name__)
CORS(app, origins=["http://localhost:5173"])
# Load default text from transcript.txt for GET requests
with open('./transcript.txt', 'r') as file:
source_text = file.read().strip()
default_source_text = file.read().strip()
@app.route("/models")
def models_view():
return json.dumps(list(AVAILABLE_MODELS.keys()))
@app.route("/salience")
def salience_view():
@app.route("/salience", methods=['GET'])
def salience_view_default():
"""GET endpoint - processes default text from transcript.txt"""
model_name = request.args.get('model', 'all-mpnet-base-v2')
# Validate model name
if model_name not in AVAILABLE_MODELS:
return json.dumps({'error': f'Invalid model: {model_name}'}), 400
sentence_ranges, adjacency = extract(default_source_text, model_name)
return json.dumps({
'source': default_source_text,
'intervals': sentence_ranges,
'adjacency': np.nan_to_num(adjacency.numpy()).tolist(),
'model': model_name,
})
@app.route("/salience", methods=['POST'])
def salience_view_custom():
"""POST endpoint - processes text from request body"""
model_name = request.args.get('model', 'all-mpnet-base-v2')
# Validate model name
if model_name not in AVAILABLE_MODELS:
return json.dumps({'error': f'Invalid model: {model_name}'}), 400
# Get document content from request body as plain text
source_text = request.data.decode('utf-8').strip()
if not source_text:
return json.dumps({'error': 'No text provided'}), 400
sentence_ranges, adjacency = extract(source_text, model_name)
return json.dumps({

View file

@ -83,16 +83,31 @@ def text_rank(sentences, model_name='all-mpnet-base-v2'):
adjacency[adjacency < 0] = 0
return normalized_adjacency(adjacency)
def extract(source_text, model_name='all-mpnet-base-v2'):
"""
Main API function that extracts sentence positions and computes normalized adjacency matrix.
Returns:
sentence_ranges: List of (start, end) tuples for each sentence's character position
adjacency: (N × N) normalized adjacency matrix where N is the number of sentences.
Each entry (i,j) represents the normalized similarity between sentences i and j.
This matrix is returned to the frontend, which raises it to a power and computes
the final salience scores via random walk simulation.
"""
sentences, sentence_ranges = get_sentences(source_text)
adjacency = text_rank(sentences, model_name)
return sentence_ranges, adjacency
# =============================================================================
# Unused/Debugging Code
# =============================================================================
def terminal_distr(adjacency, initial=None):
sample = initial if initial is not None else torch.full((adjacency.shape[0],), 1.)
scores = sample.matmul(torch.matrix_power(adjacency, 10)).numpy().tolist()
return scores
def extract(source_text, model_name='all-mpnet-base-v2'):
sentences, sentence_ranges = get_sentences(source_text)
adjacency = text_rank(sentences, model_name)
return sentence_ranges, adjacency
def get_results(sentences, adjacency):
scores = terminal_distr(adjacency)
for score, sentence in sorted(zip(scores, sentences), key=lambda xs: xs[0]):