Spaces:

wbrooks
/

CoUL-document-search

Running

wbrooks commited on 1 day ago

Commit

bccb1fa

1 Parent(s): b23d55c

making sure all the correct arguments are there in the function calls

Files changed (1) hide show

src/tfidf_search.py CHANGED Viewed

@@ -55,7 +55,7 @@ def query_worker(query: str, rownames: list[str], fasttext_model: fasttext.FastT
-def query_factory(rownames: list[str], dtm_svd: NDArray[np.float64], dtm_svd_mat: NDArray[np.float64], idf: NDArray[np.float64], vocab_norm: NDArray[np.float64], concentration: float = 10) -> Callable[[str], pl.DataFrame]:
     """
     Create a function that will compare query text to the documents in the corpus.
@@ -73,7 +73,7 @@ def query_factory(rownames: list[str], dtm_svd: NDArray[np.float64], dtm_svd_mat
         Returns:
             polars.DataFrame: Results sorted so that the best matches (according to column `score-tfidf`) are listed first.
         """
-        return query_worker(query, rownames, dtm_svd, dtm_svd_mat, vocab_norm, concentration)
     return do_query
@@ -120,4 +120,4 @@ def create_tfidf_search_function(dtm_df_path: str, vectorizer_path: str, model_n
     dtm_svd = TruncatedSVD(n_components=300)
     X_svd = dtm_svd.fit_transform(doc_term_mat)
-    return query_factory(rownames = filenames, dtm_svd = dtm_svd, dtm_svd_mat = X_svd, vocab_norm=vocab_norm, idf = my_idf, concentration = 30)

+def query_factory(rownames: list[str], fasttext_model: fasttext.FastText._FastText, idf: NDArray[np.float64], dtm_svd: NDArray[np.float64], dtm_svd_mat: NDArray[np.float64], vocab_norm: NDArray[np.float64], concentration: float = 10) -> Callable[[str], pl.DataFrame]:
     """
     Create a function that will compare query text to the documents in the corpus.
         Returns:
             polars.DataFrame: Results sorted so that the best matches (according to column `score-tfidf`) are listed first.
         """
+        return query_worker(query, rownames, fasttext_model, idf, dtm_svd, dtm_svd_mat, vocab_norm, concentration)
     return do_query
     dtm_svd = TruncatedSVD(n_components=300)
     X_svd = dtm_svd.fit_transform(doc_term_mat)
+    return query_factory(rownames = filenames, fasttext_model = fasttext_model, idf = my_idf, dtm_svd = dtm_svd, dtm_svd_mat = X_svd, vocab_norm=vocab_norm, concentration = 30)