Ma-tiezhu

@clawhub-ma-tiezhu-6d9385f521
1prompts
0upvotes received
0contributions
Joined 3 months ago
1 contribution in the last year
Aug
Sep
Oct
Nov
Dec
Jan
Feb
Mar
Apr
May
Jun
Jul
Less
Content Clipper
Skill
Extract and summarize web articles, 小红书, and Twitter content, then save clips to flomo via webhook or local markdown files with optional tags and summaries.
# content-clipper

Extract, summarize, and clip web content to note-taking services. Use when: (1) user shares a URL and wants a summary or key points extracted, (2) user wants to save/clip content to flomo, local markdown, or other note services, (3) user says "剪藏", "摘录", "存到flomo", "记到笔记", "clip this", "save to flomo", (4) user shares a 小红书/微信公众号/Twitter link and wants content extracted. Supports: web articles, 小红书 notes (text + video via screenshot), Twitter/X posts. Outputs to: flomo (webhook), local markdown files.

## Usage

### Clip to flomo
```bash
node <skill_dir>/scripts/clip.js --url "https://example.com" --target flomo
```

### Clip to local markdown
```bash
node <skill_dir>/scripts/clip.js --url "https://example.com" --target markdown --output /path/to/file.md
```

### Options
- `--url` — URL to extract content from
- `--target` — Output target: `flomo` or `markdown` (default: flomo)
- `--output` — Output file path (for markdown target)
- `--summary` — Also generate a summary
- `--tags` — Comma-separated tags to add

## Flomo Configuration
Set webhook URL in the script or via environment variable `FLOMO_WEBHOOK`.
Default webhook (Candy): https://flomoapp.com/iwh/MTg4MTA/c6fceb66258d3cc5c527d82f283ba06a/

## Notes
- Windows: uses `curl.exe --noproxy '*'` for flomo webhook (proxy bypass needed)
- 小红书: extracts text content; video notes use screenshot fallback
- Twitter/X: extracts tweet text and media URLs

FILE:package.json
{
  "name": "content-clipper",
  "version": "1.0.0",
  "description": "Extract, summarize, and clip web content to flomo or local markdown. Supports web articles, 小红书, Twitter/X, and 微信公众号.",
  "keywords": ["openclaw", "skill", "flomo", "clip", "summarize", "xiaohongshu", "twitter", "content"],
  "author": "Ma-tiezhu",
  "license": "MIT",
  "main": "scripts/clip.js",
  "engines": {
    "node": ">=18"
  }
}

FILE:scripts/clip.js
/**
 * content-clipper — Extract web content and clip to flomo or markdown
 * Usage: node clip.js --url <url> [--target flomo|markdown] [--output path] [--summary] [--tags tag1,tag2]
 */
const https = require('https');
const http = require('http');
const fs = require('fs');
const { execSync } = require('child_process');
const { URL } = require('url');

const FLOMO_WEBHOOK = process.env.FLOMO_WEBHOOK || 'https://flomoapp.com/iwh/MTg4MTA/c6fceb66258d3cc5c527d82f283ba06a/';

function parseArgs() {
  const args = process.argv.slice(2);
  const opts = { target: 'flomo', summary: false, tags: [] };
  for (let i = 0; i < args.length; i++) {
    switch (args[i]) {
      case '--url': opts.url = args[++i]; break;
      case '--target': opts.target = args[++i]; break;
      case '--output': opts.output = args[++i]; break;
      case '--summary': opts.summary = true; break;
      case '--tags': opts.tags = args[++i].split(',').map(t => t.trim()); break;
    }
  }
  return opts;
}

function fetch(url) {
  return new Promise((resolve, reject) => {
    const mod = url.startsWith('https') ? https : http;
    mod.get(url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } }, res => {
      if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
        return fetch(res.headers.location).then(resolve).catch(reject);
      }
      let data = '';
      res.on('data', c => data += c);
      res.on('end', () => resolve(data));
    }).on('error', reject);
  });
}

function extractText(html) {
  // Remove scripts, styles, nav, footer
  let text = html
    .replace(/<script[\s\S]*?<\/script>/gi, '')
    .replace(/<style[\s\S]*?<\/style>/gi, '')
    .replace(/<nav[\s\S]*?<\/nav>/gi, '')
    .replace(/<footer[\s\S]*?<\/footer>/gi, '')
    .replace(/<header[\s\S]*?<\/header>/gi, '');
  
  // Extract title
  const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
  const title = titleMatch ? titleMatch[1].replace(/\s+/g, ' ').trim() : '';
  
  // Extract article or main content
  const articleMatch = text.match(/<article[\s\S]*?>([\s\S]*?)<\/article>/i)
    || text.match(/<main[\s\S]*?>([\s\S]*?)<\/main>/i)
    || text.match(/<div[^>]*class="[^"]*content[^"]*"[^>]*>([\s\S]*?)<\/div>/i);
  
  const content = articleMatch ? articleMatch[1] : text;
  
  // Strip tags, decode entities, clean whitespace
  const cleaned = content
    .replace(/<br\s*\/?>/gi, '\n')
    .replace(/<\/p>/gi, '\n\n')
    .replace(/<\/h[1-6]>/gi, '\n\n')
    .replace(/<li[^>]*>/gi, '• ')
    .replace(/<[^>]+>/g, '')
    .replace(/&nbsp;/g, ' ')
    .replace(/&amp;/g, '&')
    .replace(/&lt;/g, '<')
    .replace(/&gt;/g, '>')
    .replace(/&quot;/g, '"')
    .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(n))
    .replace(/\n{3,}/g, '\n\n')
    .replace(/^\s+|\s+$/gm, '')
    .trim();
  
  return { title, content: cleaned };
}

function postToFlomo(content, tags) {
  const tagStr = tags.map(t => `#t`).join(' ');
  const body = JSON.stringify({ content: tagStr ? `tagStr\n\ncontent` : content });
  
  try {
    const result = execSync(
      `curl.exe --noproxy "*" -s -X POST "FLOMO_WEBHOOK" -H "Content-Type: application/json" -d JSON.stringify(body).replace(/"/g, '\\"')`,
      { encoding: 'utf8', timeout: 15000 }
    );
    return JSON.parse(result);
  } catch (e) {
    // Fallback: use Node https
    return new Promise((resolve, reject) => {
      const url = new URL(FLOMO_WEBHOOK);
      const req = https.request({
        hostname: url.hostname,
        path: url.pathname,
        method: 'POST',
        headers: { 'Content-Type': 'application/json' }
      }, res => {
        let data = '';
        res.on('data', c => data += c);
        res.on('end', () => { try { resolve(JSON.parse(data)); } catch { resolve(data); } });
      });
      req.on('error', reject);
      req.write(body);
      req.end();
    });
  }
}

function saveMarkdown(title, content, url, tags, outputPath) {
  const tagStr = tags.map(t => `#t`).join(' ');
  const md = `# title\n\n> Source: url\n> Clipped: new Date().toISOString()\n${tagStr\n` : ''}\n---\n\ncontent\n`;
  fs.writeFileSync(outputPath, md, 'utf8');
  return outputPath;
}

async function main() {
  const opts = parseArgs();
  if (!opts.url) {
    console.error('Usage: node clip.js --url <url> [--target flomo|markdown] [--output path] [--tags t1,t2]');
    process.exit(1);
  }
  
  console.error(`Fetching: opts.url`);
  const html = await fetch(opts.url);
  const { title, content } = extractText(html);
  
  if (!content || content.length < 50) {
    console.error('Warning: extracted content is very short, page may require JavaScript rendering');
  }
  
  const clipContent = `**title**\n\ncontent.slice(0, 3000)''\n\nSource: opts.url`;
  
  if (opts.target === 'flomo') {
    console.error('Posting to flomo...');
    const result = await postToFlomo(clipContent, opts.tags);
    console.log(JSON.stringify({ ok: true, target: 'flomo', title, contentLength: content.length, result }));
  } else if (opts.target === 'markdown') {
    const outPath = opts.output || `clip_Date.now().md`;
    saveMarkdown(title, content, opts.url, opts.tags, outPath);
    console.log(JSON.stringify({ ok: true, target: 'markdown', title, contentLength: content.length, path: outPath }));
  }
}

main().catch(e => { console.error('Error:', e.message); process.exit(1); });
ClawHub Research Writing+2
M@clawhub-ma-tiezhu-6d9385f521