🐦 Twitter Post Details

Viewing enriched Twitter post

@Francis_YAO_

Frontier models all have at least 100k context length, Gemini 1.5 has even 1m context. What about research and open source? Introducing Long Context Data Engineering, a data driven method achieving the first 128k context open source model matching GPT4-level Needle in a Haystack

📊 Media Metadata

{
  "media": [
    {
      "id": "",
      "type": "photo",
      "url": null,
      "media_url": "https://pbs.twimg.com/media/GGy6BD2XEAAr6lb.jpg",
      "media_url_https": null,
      "display_url": null,
      "expanded_url": null
    }
  ],
  "nlp": {
    "sentiment": "positive",
    "processed_at": "2025-08-06T12:53:26.982537"
  },
  "score": 1.0,
  "scored_at": "2025-08-09T13:46:07.546138",
  "import_source": "manual_curation_2024",
  "score_components": {
    "author": 0.09,
    "engagement": 0.139654580008829,
    "quality": 0.13999999999999999,
    "source": 0.15,
    "nlp": 0.1,
    "recency": 0.010000000000000002
  },
  "source_tagged_at": "2025-08-09T13:43:08.477283",
  "enriched": true,
  "enriched_at": "2025-08-09T13:43:08.477287",
  "original_structure": "had_media_only"
}

🔧 Raw API Response

{
  "user": {
    "created_at": "2016-06-10T16:57:03.000Z",
    "default_profile_image": false,
    "description": "PhD @EdinburghNLP on LLMs and Machine Reasoning. Ex. @Columbia @PKU1898 @MITIBMLab @allen_ai AGI has yet to come, so keep running",
    "fast_followers_count": 0,
    "favourites_count": 2140,
    "followers_count": 12234,
    "friends_count": 1658,
    "has_custom_timelines": false,
    "is_translator": false,
    "listed_count": 255,
    "location": "",
    "media_count": 41,
    "name": "Yao Fu",
    "normal_followers_count": 12234,
    "possibly_sensitive": false,
    "profile_banner_url": "https://pbs.twimg.com/profile_banners/741313237287768064/1650890183",
    "profile_image_url_https": "https://pbs.twimg.com/profile_images/1672654817297088512/CrHTKYgD_normal.jpg",
    "screen_name": "Francis_YAO_",
    "statuses_count": 688,
    "translator_type": "none",
    "url": "https://t.co/6K2Rtdvze3",
    "verified": true,
    "withheld_in_countries": [],
    "id_str": "741313237287768064"
  },
  "id": "1759986097365627054",
  "conversation_id": "1759986097365627054",
  "full_text": "Frontier models all have at least 100k context length, Gemini 1.5 has even 1m context. What about research and open source? \n\nIntroducing Long Context Data Engineering, a data driven method achieving the first 128k context open source model matching GPT4-level Needle in a Haystack",
  "reply_count": 8,
  "retweet_count": 72,
  "favorite_count": 476,
  "hashtags": [],
  "symbols": [],
  "user_mentions": [],
  "urls": [],
  "media": [
    {
      "media_url": "https://pbs.twimg.com/media/GGy6BD2XEAAr6lb.jpg",
      "type": "photo"
    }
  ],
  "url": "https://twitter.com/Francis_YAO_/status/1759986097365627054",
  "created_at": "2024-02-20T16:59:19.000Z",
  "#sort_index": "1759986097365627054",
  "view_count": 80544,
  "quote_count": 6,
  "is_quote_tweet": false,
  "is_retweet": false,
  "is_pinned": false,
  "is_truncated": true,
  "startUrl": "https://twitter.com/francis_yao_/status/1759986097365627054"
}