diff --git a/frontend/src/components/breadcrumb/breadcrumb.tsx b/frontend/src/components/breadcrumb/breadcrumb.tsx new file mode 100644 index 0000000..0dbdeec --- /dev/null +++ b/frontend/src/components/breadcrumb/breadcrumb.tsx @@ -0,0 +1,50 @@ +import { component$ } from "@builder.io/qwik"; + +interface BreadcrumbProps { + current: string; +} + +export const Breadcrumb = component$(({ current }) => { + return ( + + ); +}); diff --git a/frontend/src/components/debug-panel/debug-panel.tsx b/frontend/src/components/debug-panel/debug-panel.tsx index 34f4ac5..35eaab7 100644 --- a/frontend/src/components/debug-panel/debug-panel.tsx +++ b/frontend/src/components/debug-panel/debug-panel.tsx @@ -119,14 +119,14 @@ export const DebugPanel = component$( {/* Debug panel */} {isOpen.value && ( -
+

Debug: Salience Score Breakdown

{/* Controls */}
-
+
*:not(.editor) { + padding-left: 15px; + padding-right: 15px; + } + .editor { + box-shadow: none; + } +} diff --git a/frontend/src/routes/about/index.mdx b/frontend/src/routes/about/index.mdx index f2f47c4..75290a7 100644 --- a/frontend/src/routes/about/index.mdx +++ b/frontend/src/routes/about/index.mdx @@ -3,6 +3,9 @@ title: How Salience Works --- import { Math } from "~/components/math/math" +import { Breadcrumb } from "~/components/breadcrumb/breadcrumb" + +← Back to Demo # How Salience Works @@ -12,22 +15,49 @@ was quite neat how someone well armed with math can take sentence embeddings and determine the significance of all sentences in a document in fewer lines of code than my introduction paragraph here. -This is not a description of [all the changes I made and extra book-keeping involved to turn Matt's script into a proper web app demo](/grunt-work). +Salience highlights important sentences by treating your document as a graph +where sentences that talk about similar things are connected. We then figure +out which sentences are most "central" to the document's themes. -This post is an outsider's view of how Matt's salience code works. If you're -already working with ML models in Python, this will feel torturously detailed. +**This is not** a description of [all the changes I made and extra book-keeping +involved to turn Matt's script into a proper web app demo](./grunt-work). -My interest in this overly detailed style is, the equations a ML engineer would doodle out, the element by element matrix operations to give you feel for the dataflow, and the numpy code that implements it. +Warning! This post is an outsider's view of how Matt's salience code works. If +you're already working with ML models in Python, this will feel torturously +detailed. -When you see `sims /= norms.T` in numpy, I want to explain the matrix dimensions +I'm thinking that for someone fluent in this stuff, they see a lot more than +just Matt's few dozen lines of code. They'll see the underlying equation, the +shape of the matrices, how the data is laid out in memory, and **crucially** +the alternatives. They see this is a graph problem. And then they're flipping +through a mental catalog: random walks, spectral decomposition, diffusion, +flow-based methods. Asking which one applies, what assumptions each makes, and +whether the data is close enough to satisfy them. If not, maybe you +aproximate, linearize, symmetrize, or threshold until it does. Knowing the +toolkit and knowing when to bend the rules All of this is available to them +effortlessly, automatically, without conscious thought. +I remember when I first learned Haskell. Reading the code was slow! I had to +think quite a bit about what I was looking at. Then after about month, +something clicked. I could suddenly read Haskell like English or C++. The +translation became effortless, almost invisible. -I wrote this for the rest of us old world programmers: compilers, networking, systems programming looking at -C++/Go/Rust, or the poor souls in the frontend Typescript mines. -For us refugees of the barbarian past, the tooling and notation can look foreign. I wanted to walk through the math and -numpy operations in detail to show what's actually happening with the data. +I would bet my last donut the same thing can happen to you with numpy and and +ML papers. At some point, fluency kicks in. You will read the equations an ML +engineer would doodle out, intinctively have a feel for the dataflow, the +element by element matrix operations under the hood, while simultaneously +seeing in your mind's eye the equivalent high level numpy code. -Salience highlights important sentences by treating your document as a graph where sentences that talk about similar things are connected. We then figure out which sentences are most "central" to the document's themes. +Today I'm going to show you the equation, the matching source code, and the +alternative theorem/algorithms/tricks you could have deployed at each step. +I'll explain things that will seem painfully obvious to experts: *this is a +matrix multiplication—how many rows? how many columns? what's the shape of the +output?* That level of detail. + +I'm essentially narrating the day-jobbers' automatic, subconscious +processes. I hope laying out all the alternate forms (showing the choices, the +reasons, the mental links between code and math) brings you one step closer to +fluency. ## Step 1: Break Text into Sentences @@ -265,4 +295,4 @@ For each document, I use a simple 1D solver to find and ← Back to Demo diff --git a/frontend/src/routes/index.tsx b/frontend/src/routes/index.tsx index 7e260ab..eab9379 100644 --- a/frontend/src/routes/index.tsx +++ b/frontend/src/routes/index.tsx @@ -319,8 +319,8 @@ export default component$(() => { sentence highlights based on their significance to the document - How it works → + How it works →