🐦 Twitter Post Details

Viewing enriched Twitter post

@lhoestq

My new app is out !! ✨The Common Crawl Pipeline Creator ✨ Create your pipeline easily: ✔Run Text Extraction✂️ ✔Define Language Filters🌐 ✔Customize text quality💯 ✔See Live Results👀 ✔Get Python code 🐍 Based on famous LLM research like Gopher, C4 or FineWeb https://t.co/kEjphJ8Y3x

📊 Media Metadata

{
  "media": [
    {
      "id": "",
      "type": "video",
      "url": null,
      "media_url": "https://pbs.twimg.com/ext_tw_video_thumb/1845847655336833041/pu/img/vlSFSDz2EQfxwiHT.jpg",
      "media_url_https": null,
      "display_url": null,
      "expanded_url": null
    }
  ],
  "nlp": {
    "sentiment": "positive",
    "processed_at": "2025-08-06T12:58:12.623957"
  },
  "score": 1.0,
  "score_components": {
    "author": 0.09,
    "engagement": 0.11006985621602257,
    "quality": 0.16000000000000003,
    "source": 0.09,
    "nlp": 0.1,
    "recency": 0.020000000000000004
  },
  "scored_at": "2025-08-09T13:46:07.547972",
  "import_source": "network_archive_import",
  "source_tagged_at": "2025-08-09T13:43:16.934972",
  "enriched": true,
  "enriched_at": "2025-08-09T13:43:16.934975",
  "links_checked": true,
  "checked_at": "2025-08-10T10:32:39.217574",
  "original_structure": "had_media_only"
}

🔧 Raw API Response

{
  "user": {
    "created_at": "2013-09-09T14:26:34.000Z",
    "default_profile_image": false,
    "description": "Datasets @huggingface | Open Source + HF Dataset Hub",
    "fast_followers_count": 0,
    "favourites_count": 3817,
    "followers_count": 3390,
    "friends_count": 263,
    "has_custom_timelines": false,
    "is_translator": false,
    "listed_count": 92,
    "location": "",
    "media_count": 150,
    "name": "Quentin Lhoest 🤗",
    "normal_followers_count": 3390,
    "possibly_sensitive": false,
    "profile_banner_url": "https://pbs.twimg.com/profile_banners/1846655912/1732547744",
    "profile_image_url_https": "https://pbs.twimg.com/profile_images/1252271352054194180/wWtnTVdn_normal.png",
    "screen_name": "lhoestq",
    "statuses_count": 1542,
    "translator_type": "none",
    "url": "https://t.co/dDcQiCbZgG",
    "verified": false,
    "withheld_in_countries": [],
    "id_str": "1846655912"
  },
  "id": "1845848814197837880",
  "conversation_id": "1845848814197837880",
  "full_text": "My new app is out !!\n✨The Common Crawl Pipeline Creator ✨\n\nCreate your pipeline easily:\n\n✔Run Text Extraction✂️\n✔Define Language Filters🌐\n✔Customize text quality💯\n✔See Live Results👀\n✔Get Python code 🐍\n\nBased on famous LLM research like Gopher, C4 or FineWeb https://t.co/kEjphJ8Y3x",
  "reply_count": 5,
  "retweet_count": 25,
  "favorite_count": 108,
  "hashtags": [],
  "symbols": [],
  "user_mentions": [],
  "urls": [],
  "media": [
    {
      "media_url": "https://pbs.twimg.com/ext_tw_video_thumb/1845847655336833041/pu/img/vlSFSDz2EQfxwiHT.jpg",
      "type": "video",
      "video_url": "https://video.twimg.com/ext_tw_video/1845847655336833041/pu/vid/avc1/566x608/F7GdmOBKMqgo3dMm.mp4?tag=12"
    }
  ],
  "url": "https://twitter.com/lhoestq/status/1845848814197837880",
  "created_at": "2024-10-14T15:27:06.000Z",
  "#sort_index": "1845848814197837880",
  "view_count": 14692,
  "quote_count": 3,
  "is_quote_tweet": false,
  "is_retweet": false,
  "is_pinned": false,
  "is_truncated": false,
  "startUrl": "https://x.com/qlhoest/status/1845848814197837880"
}