Detect anomalies and outliers in construction data: unusual costs, schedule variances, productivity spikes. Statistical and ML-based detection methods.
数据来源:ClawHub。 在 ClawSkills 查看
选择你使用的 Agent
方法一:命令行安装(推荐)
推荐(无需提前安装 clawhub)
npx clawhub@latest --dir ~/.claude/skills install data-anomaly-detector或使用 clawhub CLI(需提前安装)
clawhub --dir ~/.claude/skills install data-anomaly-detector⚠️ 需要 Node.js 18+,没有 Node?请使用下方方法二直接下载 ZIP。 安装 Node.js →
方法二:手动下载安装(无需 Node)
下载 ZIP,解压后将文件夹放到以下路径,重启 Agent 即可:
安装路径
~/.claude/skills/data-anomaly-detector/💡解压后将文件夹放到上方路径,重启 Agent 即可生效
--- name: "data-anomaly-detector" description: "Detect anomalies and outliers in construction data: unusual costs, schedule variances, productivity spikes. Statistical and ML-based detection methods." homepage: "https://datadrivenconstruction.io" metadata: {"openclaw": {"emoji": "✔️", "os": ["darwin", "linux", "win32"], "homepage": "https://datadrivenconstruction.io", "requires": {"bins": ["python3"]}}} ---
Detect unusual patterns, outliers, and anomalies in construction data. Identify cost overruns, schedule delays, productivity issues, and data quality problems before they impact projects.
Construction data often contains anomalies that indicate:
Early detection prevents costly corrections and project delays.
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional, Tuple
from enum import Enum
import pandas as pd
import numpy as np
from datetime import datetime
from scipy import stats
class AnomalyType(Enum):
OUTLIER = "outlier"
PATTERN_BREAK = "pattern_break"
MISSING_SEQUENCE = "missing_sequence"
DUPLICATE = "duplicate"
IMPOSSIBLE_VALUE = "impossible_value"
TREND_DEVIATION = "trend_deviation"
class AnomalySeverity(Enum):
CRITICAL = "critical"
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
@dataclass
class Anomaly:
id: str
anomaly_type: AnomalyType
severity: AnomalySeverity
field: str
value: Any
expected_range: Optional[Tuple[float, float]] = None
description: str = ""
row_index: Optional[int] = None
detection_method: str = ""
confidence: float = 0.0
suggested_action: str = ""
@dataclass
class AnomalyReport:
source: str
detected_at: datetime
total_records: int
anomalies: List[Anomaly]
summary: Dict[str, int]
class ConstructionAnomalyDetector:
"""Detect anomalies in construction data."""
# Construction-specific thresholds
COST_THRESHOLDS = {
'concrete_per_cy': (200, 800),
'steel_per_ton': (1500, 4000),
'labor_per_hour': (25, 150),
'overhead_percentage': (5, 25),
'contingency_percentage': (3, 20),
}
SCHEDULE_THRESHOLDS = {
'max_activity_duration': 365, # days
'max_lag': 30, # days
'min_productivity': 0.1,
'max_productivity': 10.0,
}
def __init__(self):
self.anomalies: List[Anomaly] = []
self.detection_history: List[AnomalyReport] = []
def detect_cost_anomalies(self, df: pd.DataFrame, cost_column: str,
group_by: str = None) -> List[Anomaly]:
"""Detect anomalies in cost data."""
anomalies = []
# Statistical outlier detection (IQR method)
Q1 = df[cost_column].quantile(0.25)
Q3 = df[cost_column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
outliers = df[(df[cost_column] < lower_bound) | (df[cost_column] > upper_bound)]
for idx, row in outliers.iterrows():
value = row[cost_column]
severity = AnomalySeverity.HIGH if abs(value - df[cost_column].median()) > 3 * IQR else AnomalySeverity.MEDIUM
anomalies.append(Anomaly(
id=f"COST-{idx}",
anomaly_type=AnomalyType.OUTLIER,
severity=severity,
field=cost_column,
value=value,
expected_range=(lower_bound, upper_bound),
description=f"Cost value {value:,.2f} outside expected range",
row_index=idx,
detection_method="IQR",
confidence=0.95,
suggested_action="Review cost estimate for errors"
))
# Negative cost check
negatives = df[df[cost_column] < 0]
for idx, row in negatives.iterrows():
anomalies.append(Anomaly(
id=f"COST-NEG-{idx}",
anomaly_type=AnomalyType.IMPOSSIBLE_VALUE,
severity=AnomalySeverity.CRITICAL,
field=cost_column,
value=row[cost_column],
expected_range=(0, None),
description="Negative cost value detected",
row_index=idx,
detection_method="Business Rule",
confidence=1.0,
suggested_action="Correct data entry error or investigate credit"
))
# Group-based anomalies (if grouped)
if group_by and group_by in df.columns:
group_stats = df.groupby(group_by)[cost_column].agg(['mean', 'std'])
for group_name, stats in group_stats.iterrows():
group_data = df[df[group_by] == group_name]
z_scores = np.abs((group_data[cost_column] - stats['mean']) / stats['std'])
for idx, z in z_scores.items():
if z > 3:
anomalies.append(Anomaly(
id=f"COST-GROUP-{idx}",
anomaly_type=AnomalyType.OUTLIER,
severity=AnomalySeverity.MEDIUM,
field=cost_column,
value=df.loc[idx, cost_column],
description=f"Unusual cost for group {group_name} (z-score: {z:.2f})",
row_index=idx,
detection_method="Z-Score by Group",
confidence=min(z / 5, 1.0)
))
return anomalies
def detect_schedule_anomalies(self, df: pd.DataFrame) -> List[Anomaly]:
"""Detect anomalies in schedule data."""
anomalies = []
# Check for required columns
required = ['start_date', 'end_date']
if not all(col in df.columns for col in required):
return anomalies
# Convert dates
df['start_date'] = pd.to_datetime(df['start_date'])
df['end_date'] = pd.to_datetime(df['end_date'])
# Calculate duration
df['duration'] = (df['end_date'] - df['start_date']).dt.days
# Negative duration (end before start)
negative_duration = df[df['duration'] < 0]
for idx, row in negative_duration.iterrows():
anomalies.append(Anomaly(
id=f"SCHED-NEG-{idx}",
anomaly_type=AnomalyType.IMPOSSIBLE_VALUE,
severity=AnomalySeverity.CRITICAL,
field="duration",
value=row['duration'],
description="End date before start date",
row_index=idx,
detection_method="Business Rule",
confidence=1.0,
suggested_action="Correct dates"
))
# Extremely long durations
long_tasks = df[df['duration'] > self.SCHEDULE_THRESHOLDS['max_activity_duration']]
for idx, row in long_tasks.iterrows():
anomalies.append(Anomaly(
id=f"SCHED-LONG-{idx}",
anomaly_type=AnomalyType.OUTLIER,
severity=AnomalySeverity.MEDIUM,
field="duration",
value=row['duration'],
expected_range=(0, self.SCHEDULE_THRESHOLDS['max_activity_duration']),
description=f"Task duration {row['duration']} days exceeds threshold",
row_index=idx,
detection_method="Threshold",
confidence=0.9,
suggested_action="Review if task should be broken down"
))
...安装 Data Anomaly Detector 后,可以对 AI 说这些话来触发它
Help me get started with Data Anomaly Detector
Explains what Data Anomaly Detector does, walks through the setup, and runs a quick demo based on your current project
Use Data Anomaly Detector to detect anomalies and outliers in construction data: unusual costs, ...
Invokes Data Anomaly Detector with the right parameters and returns the result directly in the conversation
What can I do with Data Anomaly Detector in my data & analytics workflow?
Lists the top use cases for Data Anomaly Detector, with example commands for each scenario
将技能文件夹放到 ~/.claude/skills/data-anomaly-detector/ 目录(个人级,所有项目可用),或 .claude/skills/data-anomaly-detector/(项目级)。重启 AI 客户端后,用 /data-anomaly-detector 主动调用,或让 AI 根据上下文自动发现并使用。
Data Anomaly Detector 支持 Claude、Cursor、OpenClaw,可与这些 AI 平台无缝集成,扩展其能力。
Data Anomaly Detector 可免费安装使用。请查阅仓库了解许可证信息。
Detect anomalies and outliers in construction data: unusual costs, schedule variances, productivity spikes. Statistical and ML-based detection methods.
Data Anomaly Detector 属于「Data & Analytics」分类,该分类的技能帮助 AI 智能体在此领域执行专业任务。
Automate my data & analytics tasks using Data Anomaly Detector
Identifies repetitive steps in your workflow and sets up Data Anomaly Detector to handle them automatically