Spaces:

wbrooks
/

CoUL-document-search

Sleeping

wbrooks commited on 3 days ago

Commit

bd1c23b

1 Parent(s): edfee12

the factory has to actually return the function

Files changed (1) hide show

src/embeddings_search.py CHANGED Viewed

@@ -50,6 +50,7 @@ def sbert_query_factory(corpus_embeddings_df: pl.DataFrame, model: SentenceTrans
     Returns:
         Callable[[str], pl.DataFrame]: Function to compare the query string to the corpus and return results sorted by the cosine similarity.
     """
     def do_sbert_query(query: str) -> pl.DataFrame:
         """
@@ -61,8 +62,9 @@ def sbert_query_factory(corpus_embeddings_df: pl.DataFrame, model: SentenceTrans
         Returns:
             polars.DataFrame: Corpus documents ranked by their match to the query.
         """
-        search_fun = sbert_query(query, corpus_embeddings_df, model)
-        return search_fun
 def load_embeddings_dfs(embeddings_dir: str = "block-embeddings") -> pl.DataFrame:
@@ -125,12 +127,13 @@ def create_embeddings_search_function_from_embeddings_df(model_name: str, embedd
         Callable[[str], pl.DataFrame]: Function to compare the query string to the corpus and return results sorted by the cosine similarity.
     """
     # Instantiate the sentence-transformer model:
     sentence_model = SentenceTransformer(model_name).to(device = device)
     # import the embeddings CSVs
     block_embeddings_df = pl.read_parquet(embeddings_df_path)
     # call the factory to make the search function and return it
     return sbert_query_factory(corpus_embeddings_df = block_embeddings_df, model = sentence_model)

     Returns:
         Callable[[str], pl.DataFrame]: Function to compare the query string to the corpus and return results sorted by the cosine similarity.
     """
+    print("starting factory")
     def do_sbert_query(query: str) -> pl.DataFrame:
         """
         Returns:
             polars.DataFrame: Corpus documents ranked by their match to the query.
         """
+        return sbert_query(query, corpus_embeddings_df, model)
+    return do_sbert_query
 def load_embeddings_dfs(embeddings_dir: str = "block-embeddings") -> pl.DataFrame:
         Callable[[str], pl.DataFrame]: Function to compare the query string to the corpus and return results sorted by the cosine similarity.
     """
+    print("starting to build embeddings search")
     # Instantiate the sentence-transformer model:
     sentence_model = SentenceTransformer(model_name).to(device = device)
+    print("instantiated sentence-transformers model")
     # import the embeddings CSVs
     block_embeddings_df = pl.read_parquet(embeddings_df_path)
+    print("read the embeddings to a data frame")
     # call the factory to make the search function and return it
     return sbert_query_factory(corpus_embeddings_df = block_embeddings_df, model = sentence_model)