🐦 Twitter Post Details

Viewing enriched Twitter post

@arankomatsuzaki

Data Engineering for Scaling Language Models to 128K Context - 500M to 5B tokens are enough to enable the model to retrieve information anywhere within the 128K context - Domain balance and length upsampling are needed repo: https://t.co/kxEaWcqa3l abs: https://t.co/jvrKDntG7i https://t.co/9Sna7v0H6e

View on Twitter

📊 Media Metadata

{
  "media": [
    {
      "url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1758314231244419397/media_0.png",
      "type": "photo",
      "original_url": "https://pbs.twimg.com/media/GGbJKszXkAAuL_r.png",
      "download_date": "2025-08-13T05:52:18.332754",
      "stored_in_supabase": true,
      "format_converted_from_list": true
    }
  ],
  "conversion_date": "2025-08-13T00:32:31.399974",
  "format_converted": true,
  "original_structure": "had_media_only"
}

🔧 Raw API Response

{
  "user": {
    "created_at": "2016-11-04T06:57:37.000Z",
    "default_profile_image": false,
    "description": "ML research & startup with @EnricoShippole",
    "fast_followers_count": 0,
    "favourites_count": 9746,
    "followers_count": 86609,
    "friends_count": 79,
    "has_custom_timelines": true,
    "is_translator": false,
    "listed_count": 1174,
    "location": "",
    "media_count": 1789,
    "name": "Aran Komatsuzaki",
    "normal_followers_count": 86609,
    "possibly_sensitive": false,
    "profile_image_url_https": "https://pbs.twimg.com/profile_images/1561220982328754176/JOYS5kab_normal.jpg",
    "screen_name": "arankomatsuzaki",
    "statuses_count": 4388,
    "translator_type": "none",
    "url": "https://t.co/aZGCShnLYq",
    "verified": true,
    "withheld_in_countries": [],
    "id_str": "794433401591693312"
  },
  "id": "1758314231244419397",
  "conversation_id": "1758314231244419397",
  "full_text": "Data Engineering for Scaling Language Models to 128K Context\n\n- 500M to 5B tokens are enough to enable the model to retrieve information anywhere within the 128K context\n- Domain balance and length upsampling are needed\n\nrepo: https://t.co/kxEaWcqa3l\nabs: https://t.co/jvrKDntG7i https://t.co/9Sna7v0H6e",
  "reply_count": 3,
  "retweet_count": 28,
  "favorite_count": 150,
  "hashtags": [],
  "symbols": [],
  "user_mentions": [],
  "urls": [
    {
      "url": "https://t.co/kxEaWcqa3l",
      "expanded_url": "https://github.com/FranxYao/Long-Context-Data-Engineering",
      "display_url": "github.com/FranxYao/Long-…"
    },
    {
      "url": "https://t.co/jvrKDntG7i",
      "expanded_url": "https://arxiv.org/abs/2402.10171",
      "display_url": "arxiv.org/abs/2402.10171"
    }
  ],
  "media": [
    {
      "media_url": "https://pbs.twimg.com/media/GGbJKszXkAAuL_r.png",
      "type": "photo"
    }
  ],
  "url": "https://twitter.com/arankomatsuzaki/status/1758314231244419397",
  "created_at": "2024-02-16T02:15:55.000Z",
  "#sort_index": "1758314231244419397",
  "view_count": 12224,
  "quote_count": 1,
  "is_quote_tweet": false,
  "is_retweet": false,
  "is_pinned": false,
  "is_truncated": false,
  "startUrl": "https://twitter.com/arankomatsuzaki/status/1758314231244419397"
}