@winglian
Am I misunderstanding something is it okay to train on the benchmark data to claim SotA? https://t.co/gHIo5eRr7W
Viewing enriched Twitter post
Am I misunderstanding something is it okay to train on the benchmark data to claim SotA? https://t.co/gHIo5eRr7W
{
"media": [
{
"type": "photo",
"url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1963015442743886200/media_0.jpg?",
"filename": "media_0.jpg"
}
],
"processed_at": "2025-09-06T21:38:09.057724",
"pipeline_version": "2.0"
} {
"type": "tweet",
"id": "1963015442743886200",
"url": "https://x.com/winglian/status/1963015442743886200",
"twitterUrl": "https://twitter.com/winglian/status/1963015442743886200",
"text": "Am I misunderstanding something is it okay to train on the benchmark data to claim SotA? https://t.co/gHIo5eRr7W",
"source": "Twitter for iPhone",
"retweetCount": 7,
"replyCount": 18,
"likeCount": 287,
"quoteCount": 4,
"viewCount": 60141,
"createdAt": "Tue Sep 02 23:05:27 +0000 2025",
"lang": "en",
"bookmarkCount": 98,
"isReply": false,
"inReplyToId": null,
"conversationId": "1963015442743886200",
"displayTextRange": [
0,
89
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "winglian",
"url": "https://x.com/winglian",
"twitterUrl": "https://twitter.com/winglian",
"id": "24802509",
"name": "Wing Lian (caseus)",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": null,
"profilePicture": "https://pbs.twimg.com/profile_images/1709990043664715778/FZmSZJaK_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/24802509/1403302292",
"description": "",
"location": "Annapolis, MD",
"followers": 10665,
"following": 1832,
"status": "",
"canDm": true,
"canMediaTag": true,
"createdAt": "Tue Mar 17 00:12:21 +0000 2009",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 3146,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 159,
"statusesCount": 2839,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [],
"profile_bio": {
"description": "@axolotl_ai OSS maintainer. Axolotl AI founder. AI/ML tinkerer. Building tools for everyone.",
"entities": {
"description": {
"user_mentions": [
{
"id_str": "0",
"indices": [
0,
11
],
"name": "",
"screen_name": "axolotl_ai"
}
]
},
"url": {
"urls": [
{
"display_url": "github.com/winglian/",
"expanded_url": "https://github.com/winglian/",
"indices": [
0,
23
],
"url": "https://t.co/Ym8RB0dslS"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {
"media": [
{
"allow_download_status": {
"allow_download": true
},
"display_url": "pic.twitter.com/gHIo5eRr7W",
"expanded_url": "https://twitter.com/winglian/status/1963015442743886200/photo/1",
"ext_media_availability": {
"status": "Available"
},
"features": {
"large": {
"faces": [
{
"h": 43,
"w": 43,
"x": 447,
"y": 747
}
]
},
"orig": {
"faces": [
{
"h": 43,
"w": 43,
"x": 447,
"y": 747
}
]
}
},
"id_str": "1963015437077102593",
"indices": [
90,
113
],
"media_key": "3_1963015437077102593",
"media_results": {
"id": "QXBpTWVkaWFSZXN1bHRzOgwAAQoAARs+CCO3lsABCgACGz4IJQlbAXgAAA==",
"result": {
"__typename": "ApiMedia",
"id": "QXBpTWVkaWE6DAABCgABGz4II7eWwAEKAAIbPgglCVsBeAAA",
"media_key": "3_1963015437077102593"
}
},
"media_url_https": "https://pbs.twimg.com/media/Gz4II7eWwAEe4Rj.jpg",
"original_info": {
"focus_rects": [
{
"h": 719,
"w": 1284,
"x": 0,
"y": 0
},
{
"h": 1111,
"w": 1111,
"x": 0,
"y": 0
},
{
"h": 1111,
"w": 975,
"x": 0,
"y": 0
},
{
"h": 1111,
"w": 556,
"x": 0,
"y": 0
},
{
"h": 1111,
"w": 1284,
"x": 0,
"y": 0
}
],
"height": 1111,
"width": 1284
},
"sizes": {
"large": {
"h": 1111,
"w": 1284
}
},
"type": "photo",
"url": "https://t.co/gHIo5eRr7W"
}
]
},
"card": null,
"place": {},
"entities": {},
"quoted_tweet": {
"type": "tweet",
"id": "1962954306078048297",
"url": "https://x.com/corbtt/status/1962954306078048297",
"twitterUrl": "https://twitter.com/corbtt/status/1962954306078048297",
"text": "๐จ Weโve just published a recipe to train a frontier-level deep research agent using RL.\n\nWith just 30 hours on an H200, any developer can now beat Sonnet-4 on DeepResearch Bench using open-source tools.\n\n(Thread ๐งต) https://t.co/Ul7htDkmPX",
"source": "Twitter for iPhone",
"retweetCount": 172,
"replyCount": 38,
"likeCount": 1322,
"quoteCount": 27,
"viewCount": 206700,
"createdAt": "Tue Sep 02 19:02:31 +0000 2025",
"lang": "en",
"bookmarkCount": 1579,
"isReply": false,
"inReplyToId": null,
"conversationId": "1962954306078048297",
"displayTextRange": [
0,
214
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "corbtt",
"url": "https://x.com/corbtt",
"twitterUrl": "https://twitter.com/corbtt",
"id": "823506858",
"name": "Kyle Corbitt",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": null,
"profilePicture": "https://pbs.twimg.com/profile_images/1917706841238429696/Kd7qQjjd_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/823506858/1698191066",
"description": "",
"location": "Seattle, SF",
"followers": 16541,
"following": 259,
"status": "",
"canDm": true,
"canMediaTag": true,
"createdAt": "Fri Sep 14 15:44:30 +0000 2012",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 5305,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 217,
"statusesCount": 2338,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [
"1917269992363680054"
],
"profile_bio": {
"description": "Currently building @OpenPipeAI. Formerly @ycombinator, @google. I am always down to go on a quest.",
"entities": {
"description": {
"user_mentions": [
{
"id_str": "0",
"indices": [
19,
30
],
"name": "",
"screen_name": "OpenPipeAI"
},
{
"id_str": "0",
"indices": [
41,
53
],
"name": "",
"screen_name": "ycombinator"
},
{
"id_str": "0",
"indices": [
55,
62
],
"name": "",
"screen_name": "google"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {
"media": [
{
"display_url": "pic.twitter.com/Ul7htDkmPX",
"expanded_url": "https://twitter.com/corbtt/status/1962954306078048297/photo/1",
"ext_media_availability": {
"status": "Available"
},
"features": {
"large": {},
"orig": {}
},
"id_str": "1962954263807852544",
"indices": [
215,
238
],
"media_key": "3_1962954263807852544",
"media_results": {
"id": "QXBpTWVkaWFSZXN1bHRzOgwAAQoAARs90IC0mwAACgACGz3QiowbACkAAA==",
"result": {
"__typename": "ApiMedia",
"id": "QXBpTWVkaWE6DAABCgABGz3QgLSbAAAKAAIbPdCKjBsAKQAA",
"media_key": "3_1962954263807852544"
}
},
"media_url_https": "https://pbs.twimg.com/media/Gz3QgLSbAAAVzA3.jpg",
"original_info": {
"focus_rects": [
{
"h": 885,
"w": 1580,
"x": 0,
"y": 0
},
{
"h": 1180,
"w": 1180,
"x": 200,
"y": 0
},
{
"h": 1180,
"w": 1035,
"x": 273,
"y": 0
},
{
"h": 1180,
"w": 590,
"x": 495,
"y": 0
},
{
"h": 1180,
"w": 1580,
"x": 0,
"y": 0
}
],
"height": 1180,
"width": 1580
},
"sizes": {
"large": {
"h": 1180,
"w": 1580
}
},
"type": "photo",
"url": "https://t.co/Ul7htDkmPX"
}
]
},
"card": null,
"place": {},
"entities": {},
"quoted_tweet": null,
"retweeted_tweet": null,
"isLimitedReply": false,
"article": null
},
"retweeted_tweet": null,
"isLimitedReply": false,
"article": null
}