1- import { sql } from '@vercel/postgres' ;
2- import { NextResponse } from 'next/server' ;
1+ import { sql } from "@vercel/postgres" ;
2+ import { NextResponse } from "next/server" ;
3+ import { computeUMAP , normalizePositions } from "@/utils/umapUtils" ;
34
4- export async function GET ( ) {
5- try {
6- console . log ( 'Fetching embeddings data for visualization...' ) ;
5+ interface ChunkRow {
6+ id : string ;
7+ post_slug : string ;
8+ post_title : string ;
9+ content : string ;
10+ chunk_type : string ;
11+ metadata : {
12+ published_date ?: string ;
13+ tags ?: string [ ] ;
14+ } ;
15+ sequence : number ;
16+ embedding : unknown ;
17+ created_at : string ;
18+ }
719
8- // First, let's see how many total chunks we have
9- const totalChunks = await sql `
10- SELECT COUNT(*) as count FROM content_chunks WHERE embedding IS NOT NULL
11- ` ;
12- console . log ( `Total chunks with embeddings: ${ totalChunks . rows [ 0 ] . count } ` ) ;
20+ interface ArticleData {
21+ id : string ;
22+ postSlug : string ;
23+ postTitle : string ;
24+ content : string ;
25+ chunkType : string ;
26+ metadata : ChunkRow [ "metadata" ] ;
27+ sequence : number ;
28+ embedding : number [ ] ;
29+ publishedDate ?: string ;
30+ tags : string [ ] ;
31+ createdAt : string ;
32+ index : number ;
33+ x : number ;
34+ y : number ;
35+ }
1336
14- // Count unique articles
15- const uniqueArticles = await sql `
16- SELECT COUNT(DISTINCT post_slug) as count FROM content_chunks WHERE embedding IS NOT NULL
17- ` ;
18- console . log ( `Unique articles with embeddings: ${ uniqueArticles . rows [ 0 ] . count } ` ) ;
37+ const parseEmbedding = ( embedding : unknown ) : number [ ] => {
38+ if ( Array . isArray ( embedding ) ) {
39+ return embedding ;
40+ }
41+
42+ if ( typeof embedding === "string" ) {
43+ try {
44+ const parsed = JSON . parse ( embedding ) ;
45+ if ( Array . isArray ( parsed ) ) {
46+ return parsed ;
47+ }
48+ } catch {
49+ // Parse PostgreSQL vector format
50+ const cleaned = embedding . replace ( / [ \[ \] ] / g, "" ) ;
51+ return cleaned . split ( "," ) . map ( Number ) ;
52+ }
53+ }
54+
55+ return [ ] ;
56+ } ;
57+
58+ export async function GET ( request : Request ) {
59+ try {
60+ const { searchParams } = new URL ( request . url ) ;
61+ const nNeighbors = parseInt ( searchParams . get ( "neighbors" ) || "8" ) ;
62+ const minDist = parseFloat ( searchParams . get ( "minDist" ) || "0.05" ) ;
63+ const spread = parseFloat ( searchParams . get ( "spread" ) || "2.0" ) ;
1964
20- // Simplified query - get one embedding per article
21- const results = await sql `
65+ const results = await sql < ChunkRow > `
2266 SELECT DISTINCT ON (post_slug)
2367 id,
2468 post_slug,
@@ -31,78 +75,60 @@ export async function GET() {
3175 created_at
3276 FROM content_chunks
3377 WHERE embedding IS NOT NULL
34- ORDER BY
78+ ORDER BY
3579 post_slug,
3680 CASE WHEN chunk_type = 'full-post' THEN 0 ELSE 1 END,
3781 sequence
3882 ` ;
3983
40- console . log ( `Found ${ results . rows . length } articles with embeddings` ) ;
41-
42- // Helper function to parse embedding
43- const parseEmbedding = ( embedding : any ) : number [ ] => {
44- if ( Array . isArray ( embedding ) ) {
45- return embedding ;
46- }
47-
48- if ( typeof embedding === 'string' ) {
49- try {
50- // Try to parse as JSON array
51- const parsed = JSON . parse ( embedding ) ;
52- if ( Array . isArray ( parsed ) ) {
53- return parsed ;
54- }
55- } catch ( e ) {
56- // If not JSON, try to parse as PostgreSQL vector format
57- // Remove brackets and split by comma
58- const cleaned = embedding . replace ( / [ \[ \] ] / g, '' ) ;
59- return cleaned . split ( ',' ) . map ( Number ) ;
60- }
61- }
62-
63- console . warn ( 'Could not parse embedding:' , typeof embedding , embedding ) ;
64- return [ ] ;
65- } ;
66-
67- // Process the data for visualization
68- const processedData = results . rows . map ( ( row , index ) => {
69- const parsedEmbedding = parseEmbedding ( row . embedding ) ;
70-
71- return {
84+ const parsedData = results . rows
85+ . map ( ( row , index ) => ( {
7286 id : row . id ,
7387 postSlug : row . post_slug ,
7488 postTitle : row . post_title ,
7589 content : row . content ,
7690 chunkType : row . chunk_type ,
7791 metadata : row . metadata ,
7892 sequence : row . sequence ,
79- embedding : parsedEmbedding ,
93+ embedding : parseEmbedding ( row . embedding ) ,
8094 publishedDate : row . metadata ?. published_date ,
8195 tags : row . metadata ?. tags || [ ] ,
8296 createdAt : row . created_at ,
83- index : index
84- } ;
85- } ) . filter ( item => item . embedding . length > 0 ) ; // Filter out items with invalid embeddings
97+ index,
98+ } ) )
99+ . filter ( ( item ) => item . embedding . length > 0 ) ;
86100
87- console . log ( `Processed ${ processedData . length } articles with valid embeddings` ) ;
101+ const embeddings = parsedData . map ( ( item ) => item . embedding ) ;
102+ const umapPositions = computeUMAP ( embeddings , {
103+ nNeighbors : Math . min ( nNeighbors , parsedData . length - 1 ) ,
104+ minDist,
105+ spread,
106+ } ) ;
107+
108+ const normalizedPositions = normalizePositions (
109+ umapPositions ,
110+ 1000 ,
111+ 1000 ,
112+ 50
113+ ) ;
114+
115+ const processedData : ArticleData [ ] = parsedData . map ( ( item , index ) => ( {
116+ ...item ,
117+ x : normalizedPositions [ index ] . x ,
118+ y : normalizedPositions [ index ] . y ,
119+ } ) ) ;
88120
89121 return NextResponse . json ( {
90122 success : true ,
91123 data : processedData ,
92124 count : processedData . length ,
93- debug : {
94- totalChunks : totalChunks . rows [ 0 ] . count ,
95- uniqueArticles : uniqueArticles . rows [ 0 ] . count ,
96- returnedArticles : processedData . length
97- }
98125 } ) ;
99-
100126 } catch ( error ) {
101- console . error ( ' Error fetching embeddings:' , error ) ;
127+ console . error ( " Error fetching embeddings:" , error ) ;
102128 return NextResponse . json (
103- {
104- error : ' Failed to fetch embeddings data' ,
105- details : error instanceof Error ? error . message : ' Unknown error'
129+ {
130+ error : " Failed to fetch embeddings data" ,
131+ details : error instanceof Error ? error . message : " Unknown error" ,
106132 } ,
107133 { status : 500 }
108134 ) ;
0 commit comments