🐦 Twitter Post Details

Viewing enriched Twitter post

@_jasonwei

Large language models are notoriously hard to evaluate because (1) they are highly multi-task, (2) they generate long completions, and (3) grading is subjective. After spending ~5 months rigorously working on how to do language model evals, this is my verdict: https://t.co/JCw9DwwghC

View on Twitter

📊 Media Metadata

{
  "media": [
    {
      "url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1707102665321365793/media_0.jpg",
      "type": "photo",
      "original_url": "https://pbs.twimg.com/media/F7DYVTKaMAAuqMY.jpg",
      "download_date": "2025-08-13T06:02:13.062110",
      "stored_in_supabase": true
    }
  ],
  "conversion_date": "2025-08-13T00:40:26.353018",
  "format_converted": true,
  "original_structure": "had_media_only"
}

🔧 Raw API Response

{
  "user": {
    "created_at": "2020-10-22T02:22:58.000Z",
    "default_profile_image": false,
    "description": "ai researcher @openai",
    "fast_followers_count": 0,
    "favourites_count": 4249,
    "followers_count": 35072,
    "friends_count": 438,
    "has_custom_timelines": true,
    "is_translator": false,
    "listed_count": 577,
    "location": "SF",
    "media_count": 95,
    "name": "Jason Wei",
    "normal_followers_count": 35072,
    "possibly_sensitive": false,
    "profile_banner_url": "https://pbs.twimg.com/profile_banners/1319101874532978690/1683100754",
    "profile_image_url_https": "https://pbs.twimg.com/profile_images/1648926239389011971/kOJi1-5Z_normal.jpg",
    "screen_name": "_jasonwei",
    "statuses_count": 1028,
    "translator_type": "none",
    "url": "https://t.co/bUAfCJWu5U",
    "verified": false,
    "withheld_in_countries": [],
    "id_str": "1319101874532978690"
  },
  "id": "1707102665321365793",
  "conversation_id": "1707102665321365793",
  "full_text": "Large language models are notoriously hard to evaluate because (1) they are highly multi-task, (2) they generate long completions, and (3) grading is subjective. After spending ~5 months rigorously working on how to do language model evals, this is my verdict: https://t.co/JCw9DwwghC",
  "reply_count": 39,
  "retweet_count": 141,
  "favorite_count": 1521,
  "hashtags": [],
  "symbols": [],
  "user_mentions": [],
  "urls": [],
  "media": [
    {
      "media_url": "https://pbs.twimg.com/media/F7DYVTKaMAAuqMY.jpg",
      "type": "photo"
    }
  ],
  "url": "https://twitter.com/_jasonwei/status/1707102665321365793",
  "created_at": "2023-09-27T18:39:26.000Z",
  "#sort_index": "1707102665321365793",
  "view_count": 212065,
  "quote_count": 25,
  "is_quote_tweet": false,
  "is_retweet": false,
  "is_pinned": false,
  "is_truncated": false,
  "startUrl": "https://twitter.com/_jasonwei/status/1707102665321365793"
}