跟踪构建系统中的数据来源、转换和流程。对于审计跟踪、合规性和调试数据问题至关重要。
数据来源:ClawHub。 在 ClawSkills 查看
选择你使用的 Agent
方法一:命令行安装(推荐)
推荐(无需提前安装 clawhub)
npx clawhub@latest --dir ~/.claude/skills install data-lineage-tracker或使用 clawhub CLI(需提前安装)
clawhub --dir ~/.claude/skills install data-lineage-tracker⚠️ 需要 Node.js 18+,没有 Node?请使用下方方法二直接下载 ZIP。 安装 Node.js →
方法二:手动下载安装(无需 Node)
下载 ZIP,解压后将文件夹放到以下路径,重启 Agent 即可:
安装路径
~/.claude/skills/data-lineage-tracker/💡解压后将文件夹放到上方路径,重启 Agent 即可生效
--- name: "data-lineage-tracker" description: "Track data origin, transformations, and flow through construction systems. Essential for audit trails, compliance, and debugging data issues." homepage: "https://datadrivenconstruction.io" metadata: {"openclaw": {"emoji": "✔️", "os": ["darwin", "linux", "win32"], "homepage": "https://datadrivenconstruction.io", "requires": {"bins": ["python3"]}}} ---
Track the origin, transformations, and flow of construction data through systems. Provides audit trails for compliance, helps debug data issues, and ensures data governance.
Construction projects require data accountability:
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional, Set
from datetime import datetime
from enum import Enum
import json
import hashlib
import uuid
class TransformationType(Enum):
EXTRACT = "extract"
TRANSFORM = "transform"
LOAD = "load"
AGGREGATE = "aggregate"
JOIN = "join"
FILTER = "filter"
CALCULATE = "calculate"
MANUAL_EDIT = "manual_edit"
IMPORT = "import"
EXPORT = "export"
@dataclass
class DataSource:
id: str
name: str
system: str
location: str
owner: str
created_at: datetime
@dataclass
class TransformationStep:
id: str
transformation_type: TransformationType
description: str
input_entities: List[str]
output_entities: List[str]
logic: str # SQL, Python, or description
performed_by: str # user or system
performed_at: datetime
parameters: Dict[str, Any] = field(default_factory=dict)
@dataclass
class DataEntity:
id: str
name: str
source_id: str
entity_type: str # table, file, field, record
created_at: datetime
version: int = 1
checksum: Optional[str] = None
parent_entities: List[str] = field(default_factory=list)
metadata: Dict[str, Any] = field(default_factory=dict)
@dataclass
class LineageRecord:
id: str
entity_id: str
transformation_id: str
upstream_entities: List[str]
downstream_entities: List[str]
recorded_at: datetime
class ConstructionDataLineageTracker:
"""Track data lineage for construction data flows."""
def __init__(self, project_id: str):
self.project_id = project_id
self.sources: Dict[str, DataSource] = {}
self.entities: Dict[str, DataEntity] = {}
self.transformations: Dict[str, TransformationStep] = {}
self.lineage_records: List[LineageRecord] = []
def register_source(self, name: str, system: str, location: str, owner: str) -> DataSource:
"""Register a new data source."""
source = DataSource(
id=f"SRC-{uuid.uuid4().hex[:8]}",
name=name,
system=system,
location=location,
owner=owner,
created_at=datetime.now()
)
self.sources[source.id] = source
return source
def register_entity(self, name: str, source_id: str, entity_type: str,
parent_entities: List[str] = None,
metadata: Dict = None) -> DataEntity:
"""Register a data entity (table, file, field)."""
entity = DataEntity(
id=f"ENT-{uuid.uuid4().hex[:8]}",
name=name,
source_id=source_id,
entity_type=entity_type,
created_at=datetime.now(),
parent_entities=parent_entities or [],
metadata=metadata or {}
)
self.entities[entity.id] = entity
return entity
def calculate_checksum(self, data: Any) -> str:
"""Calculate checksum for data verification."""
if isinstance(data, str):
content = data
else:
content = json.dumps(data, sort_keys=True, default=str)
return hashlib.sha256(content.encode()).hexdigest()[:16]
def record_transformation(self,
transformation_type: TransformationType,
description: str,
input_entities: List[str],
output_entities: List[str],
logic: str,
performed_by: str,
parameters: Dict = None) -> TransformationStep:
"""Record a data transformation."""
transformation = TransformationStep(
id=f"TRF-{uuid.uuid4().hex[:8]}",
transformation_type=transformation_type,
description=description,
input_entities=input_entities,
output_entities=output_entities,
logic=logic,
performed_by=performed_by,
performed_at=datetime.now(),
parameters=parameters or {}
)
self.transformations[transformation.id] = transformation
# Create lineage records
for output_id in output_entities:
record = LineageRecord(
id=f"LIN-{uuid.uuid4().hex[:8]}",
entity_id=output_id,
transformation_id=transformation.id,
upstream_entities=input_entities,
downstream_entities=[],
recorded_at=datetime.now()
)
self.lineage_records.append(record)
# Update downstream references for input entities
for input_id in input_entities:
for existing_record in self.lineage_records:
if existing_record.entity_id == input_id:
existing_record.downstream_entities.append(output_id)
return transformation
def trace_upstream(self, entity_id: str, depth: int = None) -> List[Dict]:
"""Trace all upstream sources of an entity."""
visited = set()
lineage = []
def trace(eid: str, current_depth: int):
if eid in visited:
return
if depth is not None and current_depth > depth:
return
visited.add(eid)
entity = self.entities.get(eid)
if not entity:
return
# Find transformations that produced this entity
for record in self.lineage_records:
if record.entity_id == eid:
transformation = self.transformations.get(record.transformation_id)
if transformation:
lineage.append({
'entity': entity.name,
'entity_id': eid,
'depth': current_depth,
'transformation': transformation.description,
'transformation_type': transformation.transformation_type.value,
'performed_at': transformation.performed_at.isoformat(),
'performed_by': transformation.performed_by,
'upstream': record.upstream_entities
})
for upstream_id in record.upstream_entities:
trace(upstream_id, current_depth + 1)
trace(entity_id, 0)
return sorted(lineage, key=lambda x: x['depth'])
def trace_downstream(self, entity_id: str, depth: int = None) -> List[Dict]:
"""Trace all downstream dependencies of an entity."""
visited = set()
dependencies = []
...安装 数据沿袭追踪器 后,可以对 AI 说这些话来触发它
Help me get started with Data Lineage Tracker
Explains what Data Lineage Tracker does, walks through the setup, and runs a quick demo based on your current project
Use Data Lineage Tracker to track data origin, transformations, and flow through construction s...
Invokes Data Lineage Tracker with the right parameters and returns the result directly in the conversation
What can I do with Data Lineage Tracker in my developer & devops workflow?
Lists the top use cases for Data Lineage Tracker, with example commands for each scenario
将技能文件夹放到 ~/.claude/skills/data-lineage-tracker/ 目录(个人级,所有项目可用),或 .claude/skills/data-lineage-tracker/(项目级)。重启 AI 客户端后,用 /data-lineage-tracker 主动调用,或让 AI 根据上下文自动发现并使用。
数据沿袭追踪器 支持 Claude、Cursor、OpenClaw,可与这些 AI 平台无缝集成,扩展其能力。
数据沿袭追踪器 可免费安装使用。请查阅仓库了解许可证信息。
跟踪构建系统中的数据来源、转换和流程。对于审计跟踪、合规性和调试数据问题至关重要。
数据沿袭追踪器 属于「Developer & DevOps」分类,该分类的技能帮助 AI 智能体在此领域执行专业任务。
Automate my developer & devops tasks using Data Lineage Tracker
Identifies repetitive steps in your workflow and sets up Data Lineage Tracker to handle them automatically