Handle CSV files from construction software exports. Auto-detect delimiters, encodings, and clean messy data.
数据来源:ClawHub。 在 ClawSkills 查看
选择你使用的 Agent
方法一:命令行安装(推荐)
推荐(无需提前安装 clawhub)
npx clawhub@latest --dir ~/.claude/skills install csv-handler或使用 clawhub CLI(需提前安装)
clawhub --dir ~/.claude/skills install csv-handler⚠️ 需要 Node.js 18+,没有 Node?请使用下方方法二直接下载 ZIP。 安装 Node.js →
方法二:手动下载安装(无需 Node)
下载 ZIP,解压后将文件夹放到以下路径,重启 Agent 即可:
安装路径
~/.claude/skills/csv-handler/💡解压后将文件夹放到上方路径,重启 Agent 即可生效
--- name: "csv-handler" description: "Handle CSV files from construction software exports. Auto-detect delimiters, encodings, and clean messy data." homepage: "https://datadrivenconstruction.io" metadata: {"openclaw": {"emoji": "🏷️", "os": ["darwin", "linux", "win32"], "homepage": "https://datadrivenconstruction.io", "requires": {"bins": ["python3"]}}} ---
CSV is the universal exchange format in construction - from scheduling exports to cost databases. This skill handles encoding issues, delimiter detection, and data cleaning.
import pandas as pd
import csv
from typing import Dict, Any, List, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass
import chardet
@dataclass
class CSVProfile:
"""Profile of CSV file."""
encoding: str
delimiter: str
has_header: bool
row_count: int
column_count: int
columns: List[str]
class ConstructionCSVHandler:
"""Handle CSV files from construction software."""
COMMON_DELIMITERS = [',', ';', '\t', '|']
COMMON_ENCODINGS = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']
def __init__(self):
self.last_profile: Optional[CSVProfile] = None
def detect_encoding(self, file_path: str) -> str:
"""Detect file encoding."""
with open(file_path, 'rb') as f:
raw = f.read(10000)
result = chardet.detect(raw)
return result.get('encoding', 'utf-8') or 'utf-8'
def detect_delimiter(self, file_path: str, encoding: str) -> str:
"""Detect CSV delimiter."""
with open(file_path, 'r', encoding=encoding, errors='replace') as f:
sample = f.read(5000)
# Count occurrences
counts = {d: sample.count(d) for d in self.COMMON_DELIMITERS}
# Return most common that appears consistently
if counts:
return max(counts, key=counts.get)
return ','
def profile_csv(self, file_path: str) -> CSVProfile:
"""Profile CSV file."""
encoding = self.detect_encoding(file_path)
delimiter = self.detect_delimiter(file_path, encoding)
# Read sample
df = pd.read_csv(file_path, encoding=encoding, delimiter=delimiter,
nrows=10, on_bad_lines='skip')
has_header = not df.columns[0].replace('.', '').replace('-', '').isdigit()
# Full row count
with open(file_path, 'r', encoding=encoding, errors='replace') as f:
row_count = sum(1 for _ in f) - (1 if has_header else 0)
profile = CSVProfile(
encoding=encoding,
delimiter=delimiter,
has_header=has_header,
row_count=row_count,
column_count=len(df.columns),
columns=list(df.columns)
)
self.last_profile = profile
return profile
def read_csv(self, file_path: str,
encoding: Optional[str] = None,
delimiter: Optional[str] = None,
clean: bool = True) -> pd.DataFrame:
"""Read CSV with auto-detection."""
# Auto-detect if not provided
if encoding is None:
encoding = self.detect_encoding(file_path)
if delimiter is None:
delimiter = self.detect_delimiter(file_path, encoding)
# Read with error handling
df = pd.read_csv(
file_path,
encoding=encoding,
delimiter=delimiter,
on_bad_lines='skip',
low_memory=False
)
if clean:
df = self.clean_dataframe(df)
return df
def clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
"""Clean construction CSV data."""
# Clean column names
df.columns = [self._clean_column_name(c) for c in df.columns]
# Remove empty rows and columns
df = df.dropna(how='all')
df = df.dropna(axis=1, how='all')
# Strip whitespace from strings
for col in df.select_dtypes(include=['object']):
df[col] = df[col].str.strip() if df[col].dtype == 'object' else df[col]
return df
def _clean_column_name(self, name: str) -> str:
"""Clean column name."""
if not isinstance(name, str):
return str(name)
# Remove special characters, replace spaces
clean = name.strip().lower()
clean = clean.replace(' ', '_').replace('-', '_')
clean = ''.join(c for c in clean if c.isalnum() or c == '_')
return clean
def merge_csvs(self, file_paths: List[str],
on_column: Optional[str] = None) -> pd.DataFrame:
"""Merge multiple CSV files."""
dfs = []
for path in file_paths:
df = self.read_csv(path)
df['_source_file'] = Path(path).name
dfs.append(df)
if not dfs:
return pd.DataFrame()
if on_column and on_column in dfs[0].columns:
result = dfs[0]
for df in dfs[1:]:
result = pd.merge(result, df, on=on_column, how='outer')
return result
return pd.concat(dfs, ignore_index=True)
def split_csv(self, df: pd.DataFrame,
group_column: str,
output_dir: str) -> List[str]:
"""Split CSV by column values."""
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
files = []
for value in df[group_column].unique():
subset = df[df[group_column] == value]
filename = f"{group_column}_{value}.csv"
filepath = output_path / filename
subset.to_csv(filepath, index=False)
files.append(str(filepath))
return files
def convert_types(self, df: pd.DataFrame,
type_map: Dict[str, str] = None) -> pd.DataFrame:
"""Convert column types intelligently."""
df = df.copy()
if type_map:
for col, dtype in type_map.items():
if col in df.columns:
try:
df[col] = df[col].astype(dtype)
except:
pass
else:
# Auto-convert
for col in df.columns:
# Try numeric
try:
df[col] = pd.to_numeric(df[col])
continue
except:
pass
# Try datetime
try:
df[col] = pd.to_datetime(df[col])
except:
pass
return df
def export_csv(self, df: pd.DataFrame,
file_path: str,
encoding: str = 'utf-8-sig',
delimiter: str = ',') -> str:
"""Export DataFrame to CSV."""
df.to_csv(file_path, encoding=encoding, sep=delimiter, index=False)
return file_path
# Specialized handlers
class ScheduleCSVHandler(ConstructionCSVHandler):
"""Handler for project schedule CSVs."""
SCHEDULE_COLUMNS = ['task_id', 'task_name', 'start_date', 'end_date',
'duration', 'predecessors', 'resources']
def parse_schedule(self, file_path: str) -> pd.DataFrame:
"""Parse schedule CSV."""
df = self.read_csv(file_path)
# Convert date columns
for col in df.columns:
if 'date' in col.lower() or 'start' in col.lower() or 'end' in col.lower():
try:
df[col] = pd.to_datetime(df[col])
except:
pass
return df
class CostCSVHandler(ConstructionCSVHandler):
...安装 Csv Handler 后,可以对 AI 说这些话来触发它
Help me get started with Csv Handler
Explains what Csv Handler does, walks through the setup, and runs a quick demo based on your current project
Use Csv Handler to handle CSV files from construction software exports
Invokes Csv Handler with the right parameters and returns the result directly in the conversation
What can I do with Csv Handler in my data & analytics workflow?
Lists the top use cases for Csv Handler, with example commands for each scenario
将技能文件夹放到 ~/.claude/skills/csv-handler/ 目录(个人级,所有项目可用),或 .claude/skills/csv-handler/(项目级)。重启 AI 客户端后,用 /csv-handler 主动调用,或让 AI 根据上下文自动发现并使用。
Csv Handler 支持 Claude、Cursor、OpenClaw,可与这些 AI 平台无缝集成,扩展其能力。
Csv Handler 可免费安装使用。请查阅仓库了解许可证信息。
Handle CSV files from construction software exports. Auto-detect delimiters, encodings, and clean messy data.
Csv Handler 属于「Data & Analytics」分类,该分类的技能帮助 AI 智能体在此领域执行专业任务。
Automate my data & analytics tasks using Csv Handler
Identifies repetitive steps in your workflow and sets up Csv Handler to handle them automatically