diff --git a/packages/dbgpt-app/src/dbgpt_app/scene/chat_db/auto_execute/chat.py b/packages/dbgpt-app/src/dbgpt_app/scene/chat_db/auto_execute/chat.py index c689ede0ee..255144bdad 100644 --- a/packages/dbgpt-app/src/dbgpt_app/scene/chat_db/auto_execute/chat.py +++ b/packages/dbgpt-app/src/dbgpt_app/scene/chat_db/auto_execute/chat.py @@ -66,6 +66,7 @@ async def generate_input_values(self) -> Dict: user_input, self.curr_config.schema_retrieve_top_k, ) + logger.info(f"[RAG-NL2SQL] Retrieved table info count: {len(table_infos)}, content: {table_infos}") except Exception as e: logger.error(f"Retrieved table info error: {str(e)}") table_infos = await blocking_func_to_async( diff --git a/packages/dbgpt-ext/src/dbgpt_ext/storage/vector_store/chroma_store.py b/packages/dbgpt-ext/src/dbgpt_ext/storage/vector_store/chroma_store.py index 1c175b81bc..46b5f24b4f 100644 --- a/packages/dbgpt-ext/src/dbgpt_ext/storage/vector_store/chroma_store.py +++ b/packages/dbgpt-ext/src/dbgpt_ext/storage/vector_store/chroma_store.py @@ -149,11 +149,36 @@ def get_config(self) -> ChromaVectorConfig: return self._vector_store_config def create_collection(self, collection_name: str, **kwargs) -> Any: - return self._chroma_client.get_or_create_collection( + collection_metadata = kwargs.get("collection_metadata") + collection = self._chroma_client.get_or_create_collection( name=collection_name, embedding_function=None, - metadata=kwargs.get("collection_metadata"), + metadata=collection_metadata, ) + # Fix: if the existing collection was created without proper distance + # function metadata (e.g., hnsw:space=cosine), the default L2 distance + # makes score = 1 - distance always negative, causing all results to be + # filtered out. We must delete and recreate with correct metadata. + if ( + collection_metadata + and collection_metadata.get("hnsw:space") + and ( + not collection.metadata + or collection.metadata.get("hnsw:space") != collection_metadata.get("hnsw:space") + ) + ): + logger.warning( + f"Collection '{collection_name}' exists but has incorrect metadata " + f"(current: {collection.metadata}, expected: {collection_metadata}). " + f"Deleting and recreating with correct metadata." + ) + self._chroma_client.delete_collection(collection_name) + collection = self._chroma_client.get_or_create_collection( + name=collection_name, + embedding_function=None, + metadata=collection_metadata, + ) + return collection def similar_search( self, text, topk, filters: Optional[MetadataFilters] = None @@ -286,7 +311,8 @@ def delete_vector_name(self, vector_name: str): try: # Check if collection exists first collections = self._chroma_client.list_collections() - collection_exists = self._collection.name in collections + collection_names = [c.name if hasattr(c, 'name') else str(c) for c in collections] + collection_exists = self._collection.name in collection_names if not collection_exists: logger.warning( @@ -295,8 +321,15 @@ def delete_vector_name(self, vector_name: str): return True # Delete collection if it exists - self._chroma_client.delete_collection(self._collection.name) + deleted_name = self._collection.name + self._chroma_client.delete_collection(deleted_name) SharedSystemClient.clear_system_cache() + # Re-create the collection so self._collection points to a valid object + # Must pass collection metadata to ensure correct distance function (cosine) + self._collection = self._chroma_client.get_or_create_collection( + name=self._collection_name, + metadata={"hnsw:space": "cosine"}, + ) return True except Exception as e: