feat: text editor and blog post

2025-11-01 12:08:03 -07:00 · 2025-11-01 12:08:03 -07:00 · 78297efe5c
commit 78297efe5c
parent 9e383ee26e
17 changed files with 2008 additions and 24 deletions
--- a/api/salience/init.py
+++ b/api/salience/init.py
@ -1,25 +1,53 @@
 from flask import Flask, request
+from flask_cors import CORS
 import numpy as np
 from .salience import extract, AVAILABLE_MODELS
 import json

 app = Flask(__name__)
+CORS(app, origins=["http://localhost:5173"])

+# Load default text from transcript.txt for GET requests
 with open('./transcript.txt', 'r') as file:
-    source_text = file.read().strip()
+    default_source_text = file.read().strip()

@app.route("/models")
 def models_view():
    return json.dumps(list(AVAILABLE_MODELS.keys()))

-@app.route("/salience")
-def salience_view():
+@app.route("/salience", methods=['GET'])
+def salience_view_default():
+    """GET endpoint - processes default text from transcript.txt"""
    model_name = request.args.get('model', 'all-mpnet-base-v2')

    # Validate model name
    if model_name not in AVAILABLE_MODELS:
        return json.dumps({'error': f'Invalid model: {model_name}'}), 400

+    sentence_ranges, adjacency = extract(default_source_text, model_name)
+
+    return json.dumps({
+        'source': default_source_text,
+        'intervals': sentence_ranges,
+        'adjacency': np.nan_to_num(adjacency.numpy()).tolist(),
+        'model': model_name,
+    })
+
+@app.route("/salience", methods=['POST'])
+def salience_view_custom():
+    """POST endpoint - processes text from request body"""
+    model_name = request.args.get('model', 'all-mpnet-base-v2')
+
+    # Validate model name
+    if model_name not in AVAILABLE_MODELS:
+        return json.dumps({'error': f'Invalid model: {model_name}'}), 400
+
+    # Get document content from request body as plain text
+    source_text = request.data.decode('utf-8').strip()
+
+    if not source_text:
+        return json.dumps({'error': 'No text provided'}), 400
+
    sentence_ranges, adjacency = extract(source_text, model_name)

    return json.dumps({
--- a/api/salience/salience.py
+++ b/api/salience/salience.py
@ -83,16 +83,31 @@ def text_rank(sentences, model_name='all-mpnet-base-v2'):
    adjacency[adjacency < 0] = 0
    return normalized_adjacency(adjacency)

+def extract(source_text, model_name='all-mpnet-base-v2'):
+    """
+    Main API function that extracts sentence positions and computes normalized adjacency matrix.
+
+    Returns:
+        sentence_ranges: List of (start, end) tuples for each sentence's character position
+        adjacency: (N × N) normalized adjacency matrix where N is the number of sentences.
+                   Each entry (i,j) represents the normalized similarity between sentences i and j.
+                   This matrix is returned to the frontend, which raises it to a power and computes
+                   the final salience scores via random walk simulation.
+    """
+    sentences, sentence_ranges = get_sentences(source_text)
+    adjacency = text_rank(sentences, model_name)
+    return sentence_ranges, adjacency
+
+
+# =============================================================================
+# Unused/Debugging Code
+# =============================================================================
+
 def terminal_distr(adjacency, initial=None):
    sample = initial if initial is not None else torch.full((adjacency.shape[0],), 1.)
    scores = sample.matmul(torch.matrix_power(adjacency, 10)).numpy().tolist()
    return scores

-def extract(source_text, model_name='all-mpnet-base-v2'):
-    sentences, sentence_ranges = get_sentences(source_text)
-    adjacency = text_rank(sentences, model_name)
-    return sentence_ranges, adjacency
-
 def get_results(sentences, adjacency):
    scores = terminal_distr(adjacency)
    for score, sentence in sorted(zip(scores, sentences), key=lambda xs: xs[0]):