Source code for tests.system.weaviate.example_weaviate_openai
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsimportjsonfrompathlibimportPathimportpendulumfromairflow.decoratorsimportdag,setup,task,teardownfromairflow.providers.openai.operators.openaiimportOpenAIEmbeddingOperatorfromairflow.providers.weaviate.hooks.weaviateimportWeaviateHookfromairflow.providers.weaviate.operators.weaviateimportWeaviateIngestOperator
[docs]defexample_weaviate_openai():""" Example DAG which creates embeddings using OpenAIEmbeddingOperator and the uses WeaviateIngestOperator to insert embeddings to Weaviate . """@setup@taskdefcreate_weaviate_collection():""" Example task to create collection without any Vectorizer. You're expected to provide custom vectors for your data. """weaviate_hook=WeaviateHook()# collection definition object. Weaviate's autoschema feature will infer properties when importing.weaviate_hook.create_collection(COLLECTION_NAME)@setup@taskdefget_data_to_embed():data=json.load(Path("jeopardy_data_without_vectors.json").open())return[item["Question"]foritemindata]data_to_embed=get_data_to_embed()embed_data=OpenAIEmbeddingOperator.partial(task_id="embedding_using_xcom_data",conn_id="openai_default",model="text-embedding-ada-002",).expand(input_text=data_to_embed["return_value"])@taskdefupdate_vector_data_in_json(**kwargs):ti=kwargs["ti"]data=json.load(Path("jeopardy_data_without_vectors.json").open())embedded_data=ti.xcom_pull(task_ids="embedding_using_xcom_data",key="return_value")fori,vectorinenumerate(embedded_data):data[i]["Vector"]=vectorreturndataupdate_vector_data_in_json=update_vector_data_in_json()perform_ingestion=WeaviateIngestOperator(task_id="perform_ingestion",conn_id="weaviate_default",collection_name=COLLECTION_NAME,input_data=update_vector_data_in_json["return_value"],)embed_query=OpenAIEmbeddingOperator(task_id="embed_query",conn_id="openai_default",input_text="biology",model="text-embedding-ada-002",)@taskdefquery_weaviate(**kwargs):ti=kwargs["ti"]query_vector=ti.xcom_pull(task_ids="embed_query",key="return_value")weaviate_hook=WeaviateHook()properties=["question","answer","category"]response=weaviate_hook.query_with_vector(query_vector,COLLECTION_NAME,properties)assert"In 1953 Watson & Crick built a model"inresponse.objects[0].properties["question"]@teardown@taskdefdelete_weaviate_collection():""" Example task to delete a weaviate collection """weaviate_hook=WeaviateHook()# collection definition object. Weaviate's autoschema feature will infer properties when importing.weaviate_hook.delete_collections([COLLECTION_NAME])(create_weaviate_collection()>>embed_data>>update_vector_data_in_json>>perform_ingestion>>embed_query>>query_weaviate()>>delete_weaviate_collection())
example_weaviate_openai()fromtests_common.test_utils.system_testsimportget_test_run# noqa: E402# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)