wbrooks's picture
add the rank-combined column before using it
861d14f
raw
history blame
1.58 kB
from fastapi import FastAPI, Request, Query
from fastapi.responses import JSONResponse
from src.do_pca_on_tfidf import query_docs
from src.search_embeddings import sbert_query_docs
import polars as pl
#from jinja2 import Template
path_prefix = "/Users/wes/Google Drive/Shared drives/datalab/projects/2025_coul_aisearch/data/original_box_download/"
app = FastAPI()
@app.get("/")
def default():
return {"status": "ok", "version": 0.1}
@app.get("/sbert")
def sb(query: str):
res_sbert = sbert_query_docs(query)
return {"scores": str(res_sbert)}
@app.get("/tfidf")
def tf(query: str):
res_tfidf = query_docs(query)
return {"scores": str(res_tfidf)}
@app.get("/square")
def square(x: int):
return {"result": x * x}
@app.get("/search", response_class=JSONResponse)
def search(q: str = Query(..., description="Search query")):
res_tfidf = query_docs(q)
res_sbert = sbert_query_docs(q)
joined = res_sbert.join(res_tfidf, on='file', how = 'inner')
res_combined = joined.with_columns(
(0.7 * pl.col("rank-sbert") + 0.3 * pl.col("rank-tfidf")).alias("rank-combined"),
pl.col("file").str.strip_prefix(path_prefix).alias("file")
).sort("rank-combined").with_columns(
(20.0 / pl.col('rank-combined')).round(2).alias('confidence')
).select(['file', 'confidence'])
#return {"request": request, "results": str(res_combined)}
#return {"request": request, "results": res_combined.to_dicts()}
return res_combined.to_dicts()
@app.get("/test")
def echo(query: str):
return {"echo": query}