@clawhub-ma-tiezhu-6d9385f521
Extract and summarize web articles, 小红书, and Twitter content, then save clips to flomo via webhook or local markdown files with optional tags and summaries.
# content-clipper
Extract, summarize, and clip web content to note-taking services. Use when: (1) user shares a URL and wants a summary or key points extracted, (2) user wants to save/clip content to flomo, local markdown, or other note services, (3) user says "剪藏", "摘录", "存到flomo", "记到笔记", "clip this", "save to flomo", (4) user shares a 小红书/微信公众号/Twitter link and wants content extracted. Supports: web articles, 小红书 notes (text + video via screenshot), Twitter/X posts. Outputs to: flomo (webhook), local markdown files.
## Usage
### Clip to flomo
```bash
node <skill_dir>/scripts/clip.js --url "https://example.com" --target flomo
```
### Clip to local markdown
```bash
node <skill_dir>/scripts/clip.js --url "https://example.com" --target markdown --output /path/to/file.md
```
### Options
- `--url` — URL to extract content from
- `--target` — Output target: `flomo` or `markdown` (default: flomo)
- `--output` — Output file path (for markdown target)
- `--summary` — Also generate a summary
- `--tags` — Comma-separated tags to add
## Flomo Configuration
Set webhook URL in the script or via environment variable `FLOMO_WEBHOOK`.
Default webhook (Candy): https://flomoapp.com/iwh/MTg4MTA/c6fceb66258d3cc5c527d82f283ba06a/
## Notes
- Windows: uses `curl.exe --noproxy '*'` for flomo webhook (proxy bypass needed)
- 小红书: extracts text content; video notes use screenshot fallback
- Twitter/X: extracts tweet text and media URLs
FILE:package.json
{
"name": "content-clipper",
"version": "1.0.0",
"description": "Extract, summarize, and clip web content to flomo or local markdown. Supports web articles, 小红书, Twitter/X, and 微信公众号.",
"keywords": ["openclaw", "skill", "flomo", "clip", "summarize", "xiaohongshu", "twitter", "content"],
"author": "Ma-tiezhu",
"license": "MIT",
"main": "scripts/clip.js",
"engines": {
"node": ">=18"
}
}
FILE:scripts/clip.js
/**
* content-clipper — Extract web content and clip to flomo or markdown
* Usage: node clip.js --url <url> [--target flomo|markdown] [--output path] [--summary] [--tags tag1,tag2]
*/
const https = require('https');
const http = require('http');
const fs = require('fs');
const { execSync } = require('child_process');
const { URL } = require('url');
const FLOMO_WEBHOOK = process.env.FLOMO_WEBHOOK || 'https://flomoapp.com/iwh/MTg4MTA/c6fceb66258d3cc5c527d82f283ba06a/';
function parseArgs() {
const args = process.argv.slice(2);
const opts = { target: 'flomo', summary: false, tags: [] };
for (let i = 0; i < args.length; i++) {
switch (args[i]) {
case '--url': opts.url = args[++i]; break;
case '--target': opts.target = args[++i]; break;
case '--output': opts.output = args[++i]; break;
case '--summary': opts.summary = true; break;
case '--tags': opts.tags = args[++i].split(',').map(t => t.trim()); break;
}
}
return opts;
}
function fetch(url) {
return new Promise((resolve, reject) => {
const mod = url.startsWith('https') ? https : http;
mod.get(url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } }, res => {
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
return fetch(res.headers.location).then(resolve).catch(reject);
}
let data = '';
res.on('data', c => data += c);
res.on('end', () => resolve(data));
}).on('error', reject);
});
}
function extractText(html) {
// Remove scripts, styles, nav, footer
let text = html
.replace(/<script[\s\S]*?<\/script>/gi, '')
.replace(/<style[\s\S]*?<\/style>/gi, '')
.replace(/<nav[\s\S]*?<\/nav>/gi, '')
.replace(/<footer[\s\S]*?<\/footer>/gi, '')
.replace(/<header[\s\S]*?<\/header>/gi, '');
// Extract title
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
const title = titleMatch ? titleMatch[1].replace(/\s+/g, ' ').trim() : '';
// Extract article or main content
const articleMatch = text.match(/<article[\s\S]*?>([\s\S]*?)<\/article>/i)
|| text.match(/<main[\s\S]*?>([\s\S]*?)<\/main>/i)
|| text.match(/<div[^>]*class="[^"]*content[^"]*"[^>]*>([\s\S]*?)<\/div>/i);
const content = articleMatch ? articleMatch[1] : text;
// Strip tags, decode entities, clean whitespace
const cleaned = content
.replace(/<br\s*\/?>/gi, '\n')
.replace(/<\/p>/gi, '\n\n')
.replace(/<\/h[1-6]>/gi, '\n\n')
.replace(/<li[^>]*>/gi, '• ')
.replace(/<[^>]+>/g, '')
.replace(/ /g, ' ')
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(n))
.replace(/\n{3,}/g, '\n\n')
.replace(/^\s+|\s+$/gm, '')
.trim();
return { title, content: cleaned };
}
function postToFlomo(content, tags) {
const tagStr = tags.map(t => `#t`).join(' ');
const body = JSON.stringify({ content: tagStr ? `tagStr\n\ncontent` : content });
try {
const result = execSync(
`curl.exe --noproxy "*" -s -X POST "FLOMO_WEBHOOK" -H "Content-Type: application/json" -d JSON.stringify(body).replace(/"/g, '\\"')`,
{ encoding: 'utf8', timeout: 15000 }
);
return JSON.parse(result);
} catch (e) {
// Fallback: use Node https
return new Promise((resolve, reject) => {
const url = new URL(FLOMO_WEBHOOK);
const req = https.request({
hostname: url.hostname,
path: url.pathname,
method: 'POST',
headers: { 'Content-Type': 'application/json' }
}, res => {
let data = '';
res.on('data', c => data += c);
res.on('end', () => { try { resolve(JSON.parse(data)); } catch { resolve(data); } });
});
req.on('error', reject);
req.write(body);
req.end();
});
}
}
function saveMarkdown(title, content, url, tags, outputPath) {
const tagStr = tags.map(t => `#t`).join(' ');
const md = `# title\n\n> Source: url\n> Clipped: new Date().toISOString()\n${tagStr\n` : ''}\n---\n\ncontent\n`;
fs.writeFileSync(outputPath, md, 'utf8');
return outputPath;
}
async function main() {
const opts = parseArgs();
if (!opts.url) {
console.error('Usage: node clip.js --url <url> [--target flomo|markdown] [--output path] [--tags t1,t2]');
process.exit(1);
}
console.error(`Fetching: opts.url`);
const html = await fetch(opts.url);
const { title, content } = extractText(html);
if (!content || content.length < 50) {
console.error('Warning: extracted content is very short, page may require JavaScript rendering');
}
const clipContent = `**title**\n\ncontent.slice(0, 3000)''\n\nSource: opts.url`;
if (opts.target === 'flomo') {
console.error('Posting to flomo...');
const result = await postToFlomo(clipContent, opts.tags);
console.log(JSON.stringify({ ok: true, target: 'flomo', title, contentLength: content.length, result }));
} else if (opts.target === 'markdown') {
const outPath = opts.output || `clip_Date.now().md`;
saveMarkdown(title, content, opts.url, opts.tags, outPath);
console.log(JSON.stringify({ ok: true, target: 'markdown', title, contentLength: content.length, path: outPath }));
}
}
main().catch(e => { console.error('Error:', e.message); process.exit(1); });