🐦 Twitter Post Details

Viewing enriched Twitter post

@HKydlicek

Oh shit, it seems like all the HF Research team pretraining data has been accidentally leaked to the public. The web, PDFs, and synthetic datasets are expode on hf FineData org... Apparently, an intern used CC to push the data with private=False. https://t.co/gUcaKQBzxi

Media 1

📊 Media Metadata

{
  "media": [
    {
      "url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/2039052059484287299/media_0.jpg",
      "media_url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/2039052059484287299/media_0.jpg",
      "type": "photo",
      "filename": "media_0.jpg"
    }
  ],
  "processed_at": "2026-03-31T22:06:43.615037",
  "pipeline_version": "2.0"
}

🔧 Raw API Response

{
  "type": "tweet",
  "id": "2039052059484287299",
  "url": "https://x.com/HKydlicek/status/2039052059484287299",
  "twitterUrl": "https://twitter.com/HKydlicek/status/2039052059484287299",
  "text": "Oh shit, it seems like all the HF Research team pretraining data has been accidentally leaked to the public. The web, PDFs, and synthetic datasets are expode on hf FineData org...\n\nApparently, an intern used CC to push the data with private=False. https://t.co/gUcaKQBzxi",
  "source": "Twitter for iPhone",
  "retweetCount": 20,
  "replyCount": 12,
  "likeCount": 331,
  "quoteCount": 10,
  "viewCount": 28524,
  "createdAt": "Tue Mar 31 18:47:49 +0000 2026",
  "lang": "en",
  "bookmarkCount": 164,
  "isReply": false,
  "inReplyToId": null,
  "conversationId": "2039052059484287299",
  "displayTextRange": [
    0,
    247
  ],
  "inReplyToUserId": null,
  "inReplyToUsername": null,
  "author": {
    "type": "user",
    "userName": "HKydlicek",
    "url": "https://x.com/HKydlicek",
    "twitterUrl": "https://twitter.com/HKydlicek",
    "id": "1470207594727940099",
    "name": "Hynek Kydlíček",
    "isVerified": false,
    "isBlueVerified": false,
    "verifiedType": null,
    "profilePicture": "https://pbs.twimg.com/profile_images/2001409107232763904/TDbrpdja_normal.jpg",
    "coverPicture": "",
    "description": "",
    "location": "Czech Republic",
    "followers": 1554,
    "following": 486,
    "status": "",
    "canDm": true,
    "canMediaTag": true,
    "createdAt": "Mon Dec 13 01:43:13 +0000 2021",
    "entities": {
      "description": {
        "urls": []
      },
      "url": {}
    },
    "fastFollowersCount": 0,
    "favouritesCount": 743,
    "hasCustomTimelines": true,
    "isTranslator": false,
    "mediaCount": 184,
    "statusesCount": 805,
    "withheldInCountries": [],
    "affiliatesHighlightedLabel": {},
    "possiblySensitive": false,
    "pinnedTweetIds": [
      "1964584936524124645"
    ],
    "profile_bio": {
      "description": "Pre-training data @huggingface 🤗\nPrague, CZ\n🇪🇺 eu/acc",
      "entities": {
        "description": {
          "hashtags": [],
          "symbols": [],
          "urls": [],
          "user_mentions": [
            {
              "id_str": "0",
              "indices": [
                18,
                30
              ],
              "name": "",
              "screen_name": "huggingface"
            }
          ]
        }
      }
    },
    "isAutomated": false,
    "automatedBy": null
  },
  "extendedEntities": {
    "media": [
      {
        "allow_download_status": {
          "allow_download": true
        },
        "display_url": "pic.twitter.com/gUcaKQBzxi",
        "expanded_url": "https://twitter.com/HKydlicek/status/2039052059484287299/photo/1",
        "ext_media_availability": {
          "status": "Available"
        },
        "features": {
          "large": {
            "faces": []
          },
          "orig": {
            "faces": []
          }
        },
        "id_str": "2039051370792468480",
        "indices": [
          248,
          271
        ],
        "media_key": "3_2039051370792468480",
        "media_results": {
          "id": "QXBpTWVkaWFSZXN1bHRzOgwAAQoAARxMKmsa28AACgACHEwrC3QW8UMAAA==",
          "result": {
            "__typename": "ApiMedia",
            "id": "QXBpTWVkaWE6DAABCgABHEwqaxrbwAAKAAIcTCsLdBbxQwAA",
            "media_key": "3_2039051370792468480"
          }
        },
        "media_url_https": "https://pbs.twimg.com/media/HEwqaxrbwAA6rRf.jpg",
        "original_info": {
          "focus_rects": [
            {
              "h": 1203,
              "w": 2148,
              "x": 0,
              "y": 419
            },
            {
              "h": 1968,
              "w": 1968,
              "x": 0,
              "y": 0
            },
            {
              "h": 1968,
              "w": 1726,
              "x": 0,
              "y": 0
            },
            {
              "h": 1968,
              "w": 984,
              "x": 0,
              "y": 0
            },
            {
              "h": 1968,
              "w": 2148,
              "x": 0,
              "y": 0
            }
          ],
          "height": 1968,
          "width": 2148
        },
        "sizes": {
          "large": {
            "h": 1876,
            "w": 2048
          }
        },
        "type": "photo",
        "url": "https://t.co/gUcaKQBzxi"
      }
    ]
  },
  "card": null,
  "place": {},
  "entities": {
    "hashtags": [],
    "symbols": [],
    "timestamps": [],
    "urls": [],
    "user_mentions": []
  },
  "quoted_tweet": null,
  "retweeted_tweet": null,
  "isLimitedReply": false,
  "article": null
}