You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
encoding_model: cl100k_baseskip_workflows: []llm:
api_key: ${GRAPHRAG_CHAT_API_KEY}type: openai_chat # or azure_openai_chatmodel: ${GRAPHRAG_CHAT_MODEL}model_supports_json: false # recommended if this is available for your model.# audience: "https://cognitiveservices.azure.com/.default"# max_tokens: 4000# request_timeout: 180.0api_base: ${GRAPHRAG_API_BASE}# api_version: 2024-02-15-preview# organization: <organization_id># deployment_name: <azure_model_deployment_name># tokens_per_minute: 150_000 # set a leaky bucket throttle# requests_per_minute: 10_000 # set a leaky bucket throttle# max_retries: 10# max_retry_wait: 10.0# sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times# concurrent_requests: 25 # the number of parallel inflight requests that may be made# temperature: 1 # temperature for sampling# top_p: 0.8 # top-p sampling# n: 1 # Number of completions to generateparallelization:
stagger: 0.3# num_threads: 50 # the number of threads to use for parallel processingasync_mode: threaded # or asyncioembeddings:
## parallelization: override the global parallelization settings for embeddingsasync_mode: threaded # or asyncio# target: required # or all# batch_size: 16 # the number of documents to send in a single request# batch_max_tokens: 8191 # the maximum number of tokens to send in a single requestvector_store:
type: lancedbdb_uri: 'output/lancedb'container_name: default # A prefix for the vector store to create embedding containers. Default: 'default'.overwrite: true# vector_store: # configuration for AI Search# type: azure_ai_search# url: <ai_search_endpoint># api_key: <api_key> # if not set, will attempt to use managed identity. Expects the `Search Index Data Contributor` RBAC role in this case.# audience: <optional> # if using managed identity, the audience to use for the token# overwrite: true # or false. Only applicable at index creation time# container_name: default # A prefix for the AzureAISearch to create indexes. Default: 'default'.llm:
api_key: ${GRAPHRAG_CHAT_API_KEY}type: openai_embedding # or azure_openai_embeddingmodel: text-embedding-3-smallapi_base: ${GRAPHRAG_API_BASE}# api_version: 2024-02-15-preview# audience: "https://cognitiveservices.azure.com/.default"# organization: <organization_id># deployment_name: <azure_model_deployment_name># tokens_per_minute: 150_000 # set a leaky bucket throttle# requests_per_minute: 10_000 # set a leaky bucket throttle# max_retries: 10# max_retry_wait: 10.0# sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times# concurrent_requests: 25 # the number of parallel inflight requests that may be madechunks:
size: 600overlap: 50group_by_columns: [id] # by default, we don't allow chunks to cross documentsinput:
type: file # or blobfile_type: text # or csvbase_dir: "input"file_encoding: utf-8file_pattern: ".*\\.txt$"cache:
type: file # or blobbase_dir: "cache"# connection_string: <azure_blob_storage_connection_string># container_name: <azure_blob_storage_container_name>storage:
type: file # or blobbase_dir: "output"# connection_string: <azure_blob_storage_connection_string># container_name: <azure_blob_storage_container_name>#update_index_storage: # Storage to save an updated index (for incremental indexing). Enabling this performs an incremental index run# type: file # or blob# base_dir: "update_output"# connection_string: <azure_blob_storage_connection_string># container_name: <azure_blob_storage_container_name>reporting:
type: file # or console, blobbase_dir: "logs"# connection_string: <azure_blob_storage_connection_string># container_name: <azure_blob_storage_container_name>entity_extraction:
## strategy: fully override the entity extraction strategy.## type: one of graph_intelligence, graph_intelligence_json and nltk## llm: override the global llm settings for this task## parallelization: override the global parallelization settings for this task## async_mode: override the global async_mode settings for this taskprompt: "prompts/entity_extraction.txt"entity_types: [业务办理项编码, 实施主体, 事项名称, 实施主体编码,权力来源]max_gleanings: 1summarize_descriptions:
## llm: override the global llm settings for this task## parallelization: override the global parallelization settings for this task## async_mode: override the global async_mode settings for this taskprompt: "prompts/summarize_descriptions.txt"max_length: 200claim_extraction:
## llm: override the global llm settings for this task## parallelization: override the global parallelization settings for this task## async_mode: override the global async_mode settings for this taskenabled: trueprompt: "prompts/claim_extraction.txt"description: "Any claims or facts that could be relevant to information discovery."max_gleanings: 1community_reports:
## llm: override the global llm settings for this task## parallelization: override the global parallelization settings for this task## async_mode: override the global async_mode settings for this taskprompt: "prompts/community_report.txt"max_length: 1000max_input_length: 2000cluster_graph:
max_cluster_size: 10embed_graph:
enabled: false # if true, will generate node2vec embeddings for nodes# num_walks: 10# walk_length: 40# window_size: 2# iterations: 3# random_seed: 597832umap:
enabled: false # if true, will generate UMAP embeddings for nodessnapshots:
graphml: falseraw_entities: falsetop_level_nodes: falselocal_search:
# text_unit_prop: 0.5# community_prop: 0.1# conversation_history_max_turns: 5# top_k_mapped_entities: 10# top_k_relationships: 10# llm_temperature: 0 # temperature for sampling# llm_top_p: 1 # top-p sampling# llm_n: 1 # Number of completions to generate# max_tokens: 12000global_search:
# llm_temperature: 0 # temperature for sampling# llm_top_p: 1 # top-p sampling# llm_n: 1 # Number of completions to generate# max_tokens: 12000# data_max_tokens: 12000# map_max_tokens: 1000# reduce_max_tokens: 2000# concurrency: 32
Logs and screenshots
No response
Additional Information
GraphRAG Version:
Operating System:
Python Version:
Related Issues:
The text was updated successfully, but these errors were encountered:
Redhair957
added
the
triage
Default label assignment, indicates new issue needs reviewed by a maintainer
label
Jan 9, 2025
Do you need to file an issue?
Describe the issue
⠸ GraphRAG Indexer
├── Loading Input (text) - 1 files loaded (0 filtered) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00 0:00:00
├── create_base_text_units
├── create_final_documents
└── create_base_entity_graph%
Steps to reproduce
No response
GraphRAG Config Used
Logs and screenshots
No response
Additional Information
The text was updated successfully, but these errors were encountered: