Build RAG systems for construction knowledge bases. Create searchable AI-powered construction document systems
数据来源:ClawHub。 在 ClawSkills 查看
选择你使用的 Agent
方法一:命令行安装(推荐)
推荐(无需提前安装 clawhub)
npx clawhub@latest --dir ~/.claude/skills install rag-construction或使用 clawhub CLI(需提前安装)
clawhub --dir ~/.claude/skills install rag-construction⚠️ 需要 Node.js 18+,没有 Node?请使用下方方法二直接下载 ZIP。 安装 Node.js →
方法二:手动下载安装(无需 Node)
下载 ZIP,解压后将文件夹放到以下路径,重启 Agent 即可:
安装路径
~/.claude/skills/rag-construction/💡解压后将文件夹放到上方路径,重启 Agent 即可生效
--- name: "rag-construction" description: "Build RAG systems for construction knowledge bases. Create searchable AI-powered construction document systems" homepage: "https://datadrivenconstruction.io" metadata: {"openclaw": {"emoji": "🐼", "os": ["darwin", "linux", "win32"], "homepage": "https://datadrivenconstruction.io", "requires": {"bins": ["python3"]}}} ---
Based on DDC methodology (Chapter 2.3), this skill builds Retrieval-Augmented Generation (RAG) systems for construction knowledge bases, enabling semantic search and AI-powered question answering over construction documents.
Book Reference: "Pandas DataFrame и LLM ChatGPT" / "Pandas DataFrame and LLM ChatGPT"
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Dict, Optional, Any, Callable
from datetime import datetime
import json
import hashlib
import re
class DocumentType(Enum):
"""Types of construction documents"""
SPECIFICATION = "specification"
DRAWING = "drawing"
CONTRACT = "contract"
RFI = "rfi"
SUBMITTAL = "submittal"
CHANGE_ORDER = "change_order"
MEETING_MINUTES = "meeting_minutes"
DAILY_REPORT = "daily_report"
SAFETY_REPORT = "safety_report"
INSPECTION = "inspection"
MANUAL = "manual"
STANDARD = "standard"
class ChunkingStrategy(Enum):
"""Text chunking strategies"""
FIXED_SIZE = "fixed_size"
PARAGRAPH = "paragraph"
SECTION = "section"
SEMANTIC = "semantic"
SENTENCE = "sentence"
@dataclass
class DocumentChunk:
"""A chunk of document text"""
id: str
document_id: str
content: str
metadata: Dict[str, Any]
embedding: Optional[List[float]] = None
token_count: int = 0
position: int = 0
@dataclass
class Document:
"""Construction document"""
id: str
title: str
doc_type: DocumentType
content: str
source: str
metadata: Dict[str, Any] = field(default_factory=dict)
chunks: List[DocumentChunk] = field(default_factory=list)
created_at: datetime = field(default_factory=datetime.now)
@dataclass
class SearchResult:
"""Search result from vector store"""
chunk: DocumentChunk
score: float
document_title: str
doc_type: DocumentType
@dataclass
class RAGResponse:
"""Response from RAG system"""
query: str
answer: str
sources: List[SearchResult]
confidence: float
tokens_used: int
class TextChunker:
"""Split documents into chunks for embedding"""
def __init__(
self,
strategy: ChunkingStrategy = ChunkingStrategy.PARAGRAPH,
chunk_size: int = 500,
chunk_overlap: int = 50
):
self.strategy = strategy
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap
def chunk_document(self, document: Document) -> List[DocumentChunk]:
"""Split document into chunks"""
if self.strategy == ChunkingStrategy.FIXED_SIZE:
return self._chunk_fixed_size(document)
elif self.strategy == ChunkingStrategy.PARAGRAPH:
return self._chunk_by_paragraph(document)
elif self.strategy == ChunkingStrategy.SECTION:
return self._chunk_by_section(document)
elif self.strategy == ChunkingStrategy.SENTENCE:
return self._chunk_by_sentence(document)
else:
return self._chunk_fixed_size(document)
def _chunk_fixed_size(self, document: Document) -> List[DocumentChunk]:
"""Chunk by fixed character size with overlap"""
chunks = []
text = document.content
start = 0
position = 0
while start < len(text):
end = start + self.chunk_size
# Find word boundary
if end < len(text):
while end > start and text[end] not in ' \n\t':
end -= 1
chunk_text = text[start:end].strip()
if chunk_text:
chunk_id = self._generate_chunk_id(document.id, position)
chunks.append(DocumentChunk(
id=chunk_id,
document_id=document.id,
content=chunk_text,
metadata={
"doc_type": document.doc_type.value,
"title": document.title,
**document.metadata
},
token_count=len(chunk_text.split()),
position=position
))
position += 1
start = end - self.chunk_overlap
if start >= len(text):
break
return chunks
def _chunk_by_paragraph(self, document: Document) -> List[DocumentChunk]:
"""Chunk by paragraphs"""
chunks = []
paragraphs = document.content.split('\n\n')
current_chunk = ""
position = 0
for para in paragraphs:
para = para.strip()
if not para:
continue
if len(current_chunk) + len(para) < self.chunk_size:
current_chunk += "\n\n" + para if current_chunk else para
else:
if current_chunk:
chunk_id = self._generate_chunk_id(document.id, position)
chunks.append(DocumentChunk(
id=chunk_id,
document_id=document.id,
content=current_chunk,
metadata={
"doc_type": document.doc_type.value,
"title": document.title,
**document.metadata
},
token_count=len(current_chunk.split()),
position=position
))
position += 1
current_chunk = para
# Add remaining content
if current_chunk:
chunk_id = self._generate_chunk_id(document.id, position)
chunks.append(DocumentChunk(
id=chunk_id,
document_id=document.id,
content=current_chunk,
metadata={
"doc_type": document.doc_type.value,
"title": document.title,
**document.metadata
},
token_count=len(current_chunk.split()),
position=position
))
return chunks
def _chunk_by_section(self, document: Document) -> List[DocumentChunk]:
"""Chunk by document sections (headers)"""
# Split by common section patterns
section_pattern = r'\n(?=(?:\d+\.|\d+\s|SECTION|ARTICLE|PART)\s+[A-Z])'
sections = re.split(section_pattern, document.content)
chunks = []
for position, section in enumerate(sections):
section = section.strip()
if section:
# If section is too large, further split it
if len(section) > self.chunk_size * 2:
sub_chunker = TextChunker(ChunkingStrategy.PARAGRAPH, self.chunk_size)
sub_doc = Document(
id=f"{document.id}_sec{position}",
title=document.title,
doc_type=document.doc_type,
content=section,
source=document.source,
metadata=document.metadata
)
sub_chunks = sub_chunker.chunk_document(sub_doc)
for i, chunk in enumerate(sub_chunks):
chunk.id = self._generate_chunk_id(document.id, position * 100 + i)
...安装 Rag Construction 后,可以对 AI 说这些话来触发它
Help me get started with Rag Construction
Explains what Rag Construction does, walks through the setup, and runs a quick demo based on your current project
Use Rag Construction to build RAG systems for construction knowledge bases
Invokes Rag Construction with the right parameters and returns the result directly in the conversation
What can I do with Rag Construction in my documents & notes workflow?
Lists the top use cases for Rag Construction, with example commands for each scenario
将技能文件夹放到 ~/.claude/skills/rag-construction/ 目录(个人级,所有项目可用),或 .claude/skills/rag-construction/(项目级)。重启 AI 客户端后,用 /rag-construction 主动调用,或让 AI 根据上下文自动发现并使用。
Rag Construction 支持 Claude、Cursor、OpenClaw,可与这些 AI 平台无缝集成,扩展其能力。
Rag Construction 可免费安装使用。请查阅仓库了解许可证信息。
Build RAG systems for construction knowledge bases. Create searchable AI-powered construction document systems
Rag Construction 属于「Documents & Notes」分类,该分类的技能帮助 AI 智能体在此领域执行专业任务。
Automate my documents & notes tasks using Rag Construction
Identifies repetitive steps in your workflow and sets up Rag Construction to handle them automatically