feat: text editor and blog post
This commit is contained in:
parent
9e383ee26e
commit
78297efe5c
17 changed files with 2008 additions and 24 deletions
|
|
@ -1,25 +1,53 @@
|
|||
from flask import Flask, request
|
||||
from flask_cors import CORS
|
||||
import numpy as np
|
||||
from .salience import extract, AVAILABLE_MODELS
|
||||
import json
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app, origins=["http://localhost:5173"])
|
||||
|
||||
# Load default text from transcript.txt for GET requests
|
||||
with open('./transcript.txt', 'r') as file:
|
||||
source_text = file.read().strip()
|
||||
default_source_text = file.read().strip()
|
||||
|
||||
@app.route("/models")
|
||||
def models_view():
|
||||
return json.dumps(list(AVAILABLE_MODELS.keys()))
|
||||
|
||||
@app.route("/salience")
|
||||
def salience_view():
|
||||
@app.route("/salience", methods=['GET'])
|
||||
def salience_view_default():
|
||||
"""GET endpoint - processes default text from transcript.txt"""
|
||||
model_name = request.args.get('model', 'all-mpnet-base-v2')
|
||||
|
||||
# Validate model name
|
||||
if model_name not in AVAILABLE_MODELS:
|
||||
return json.dumps({'error': f'Invalid model: {model_name}'}), 400
|
||||
|
||||
sentence_ranges, adjacency = extract(default_source_text, model_name)
|
||||
|
||||
return json.dumps({
|
||||
'source': default_source_text,
|
||||
'intervals': sentence_ranges,
|
||||
'adjacency': np.nan_to_num(adjacency.numpy()).tolist(),
|
||||
'model': model_name,
|
||||
})
|
||||
|
||||
@app.route("/salience", methods=['POST'])
|
||||
def salience_view_custom():
|
||||
"""POST endpoint - processes text from request body"""
|
||||
model_name = request.args.get('model', 'all-mpnet-base-v2')
|
||||
|
||||
# Validate model name
|
||||
if model_name not in AVAILABLE_MODELS:
|
||||
return json.dumps({'error': f'Invalid model: {model_name}'}), 400
|
||||
|
||||
# Get document content from request body as plain text
|
||||
source_text = request.data.decode('utf-8').strip()
|
||||
|
||||
if not source_text:
|
||||
return json.dumps({'error': 'No text provided'}), 400
|
||||
|
||||
sentence_ranges, adjacency = extract(source_text, model_name)
|
||||
|
||||
return json.dumps({
|
||||
|
|
|
|||
|
|
@ -83,16 +83,31 @@ def text_rank(sentences, model_name='all-mpnet-base-v2'):
|
|||
adjacency[adjacency < 0] = 0
|
||||
return normalized_adjacency(adjacency)
|
||||
|
||||
def extract(source_text, model_name='all-mpnet-base-v2'):
|
||||
"""
|
||||
Main API function that extracts sentence positions and computes normalized adjacency matrix.
|
||||
|
||||
Returns:
|
||||
sentence_ranges: List of (start, end) tuples for each sentence's character position
|
||||
adjacency: (N × N) normalized adjacency matrix where N is the number of sentences.
|
||||
Each entry (i,j) represents the normalized similarity between sentences i and j.
|
||||
This matrix is returned to the frontend, which raises it to a power and computes
|
||||
the final salience scores via random walk simulation.
|
||||
"""
|
||||
sentences, sentence_ranges = get_sentences(source_text)
|
||||
adjacency = text_rank(sentences, model_name)
|
||||
return sentence_ranges, adjacency
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Unused/Debugging Code
|
||||
# =============================================================================
|
||||
|
||||
def terminal_distr(adjacency, initial=None):
|
||||
sample = initial if initial is not None else torch.full((adjacency.shape[0],), 1.)
|
||||
scores = sample.matmul(torch.matrix_power(adjacency, 10)).numpy().tolist()
|
||||
return scores
|
||||
|
||||
def extract(source_text, model_name='all-mpnet-base-v2'):
|
||||
sentences, sentence_ranges = get_sentences(source_text)
|
||||
adjacency = text_rank(sentences, model_name)
|
||||
return sentence_ranges, adjacency
|
||||
|
||||
def get_results(sentences, adjacency):
|
||||
scores = terminal_distr(adjacency)
|
||||
for score, sentence in sorted(zip(scores, sentences), key=lambda xs: xs[0]):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue