pwcGraphRAG / settings.yaml
cordwainersmith
Add project files and Docker setup
c917d47
encoding_model: cl100k_base
skip_workflows: []
llm:
api_key: ${GRAPHRAG_API_KEY}
type: openai_chat
model: gpt-4o-mini
model_supports_json: true
max_tokens: 4000
temperature: 0
embeddings:
async_mode: threaded
batch_size: 16
vector_store:
type: lancedb
db_uri: 'output/lancedb'
container_name: default
overwrite: true
llm:
api_key: ${GRAPHRAG_API_KEY}
type: openai_embedding
model: text-embedding-3-small
chunks:
size: 500
overlap: 50
group_by_columns: [id]
input:
type: file
file_type: text
base_dir: "input"
file_pattern: ".*\\.txt$"
recursive: true
source_tracking: true
processing_order:
- path: "first_paragraphs"
priority: 1
purpose: "graph_building"
- path: "full_documents"
priority: 2
purpose: "retrieval"
entity_extraction:
prompt: "prompts/entity_extraction.txt"
entity_types:
- "Baggage Type"
- "Dimension"
- "Linear Dimension"
- "Weight"
- "Material Type"
- "Wheel Configuration"
- "Measurement Unit"
- "Size Category"
- "Weight Category"
- "Airline"
- "Alliance"
- "Airport"
- "Route Type"
- "Travel Class"
- "Cabin Section"
- "Aircraft Type"
- "Restriction"
- "Exemption"
- "Policy"
- "Fee Structure"
- "Currency"
- "Allowance"
- "Special Item"
- "Prohibited Item"
- "Restricted Item"
- "Dangerous Good"
- "Fragile Item"
- "Valuable Item"
- "Required Document"
- "Label Type"
- "Tag Category"
- "Service Type"
- "Handler Role"
- "Service Location"
- "Time Period"
- "Passenger Type"
- "Membership Level"
- "Group Category"
max_gleanings: 2
source_filter: "first_paragraphs"
claim_extraction:
enabled: true
claim_types:
- "Basic Size Restriction"
- "Oversize Condition"
- "Weight Limit Standard"
- "Overweight Condition"
- "Combined Dimension Limit"
- "Cabin Storage Requirement"
- "Standard Fee"
- "Excess Fee"
- "Oversize Fee"
- "Overweight Fee"
- "Special Handling Fee"
- "Season Surcharge"
- "Route-Specific Fee"
- "Multi-Piece Pricing"
- "Fee Waiver Condition"
- "Basic Allowance"
- "Class-Based Allowance"
- "Status-Based Allowance"
- "Route-Based Allowance"
- "Special Group Allowance"
- "Seasonal Allowance"
- "Equipment Allowance"
- "Prohibited Item Policy"
- "Restricted Item Condition"
- "Dangerous Goods Policy"
- "Special Item Restriction"
- "Packaging Requirement"
- "Declaration Requirement"
- "Check-in Deadline"
- "Special Handling Procedure"
- "Priority Handling Rule"
- "Transfer Handling Policy"
- "Delivery Service Policy"
- "Storage Policy"
- "Liability Limit"
- "Insurance Requirement"
- "Claim Procedure"
- "Compensation Policy"
- "Time Limit Policy"
- "Weather Restriction"
- "Seasonal Restriction"
- "Aircraft Limitation"
- "Route Restriction"
- "Connection Impact"
- "Tag Requirement"
- "Label Requirement"
- "Documentation Requirement"
- "Declaration Policy"
- "Handling Standard"
- "Service Level Agreement"
- "Priority Service Standard"
- "Delivery Time Standard"
- "Medical Exception"
- "Military Exception"
- "Diplomatic Exception"
- "Event Exception"
- "Emergency Exception"
prompt: "prompts/claim_extraction.txt"
description: "Extract baggage measurements, weight limits, and restrictions from airline documentation."
max_gleanings: 2
source_filter: "first_paragraphs"
local_search:
text_unit_prop: 0.7
community_prop: 0.3
top_k_mapped_entities: 15
top_k_relationships: 15
max_tokens: 4000
source_priority:
graph_search: "first_paragraphs"
answer_retrieval: "full_documents"
global_search:
max_tokens: 4000
data_max_tokens: 4000
map_max_tokens: 1000
reduce_max_tokens: 2000
allow_general_knowledge: false
min_score_threshold: 0.1
concurrency: 10
embed_graph:
enabled: true
num_walks: 100
walk_length: 10
window_size: 5
iterations: 10
umap:
enabled: true
n_neighbors: 15
min_dist: 0.1
n_components: 2
storage:
type: file
base_dir: "output"
cache:
type: file
base_dir: "cache"
reporting:
type: file
base_dir: "reports"
include_source_tracking: true