@random_walker
RT @steverab: In our paper "Towards a Science of AI Agent Reliability" we put numbers on the capability-reliability gap. Now we're showing…
Viewing enriched Twitter post
RT @steverab: In our paper "Towards a Science of AI Agent Reliability" we put numbers on the capability-reliability gap. Now we're showing…
{
"score": 0.36,
"score_components": {
"author": 0.09,
"engagement": 0.0,
"quality": 0.06000000000000001,
"source": 0.135,
"nlp": 0.05,
"recency": 0.025
},
"scored_at": "2026-03-17T15:02:17.741413",
"import_source": "api_import",
"source_tagged_at": "2026-03-17T15:02:17.741424",
"enriched": true,
"enriched_at": "2026-03-17T15:02:17.741425"
} {
"type": "tweet",
"id": "2033920948861030411",
"url": "https://x.com/random_walker/status/2033920948861030411",
"twitterUrl": "https://twitter.com/random_walker/status/2033920948861030411",
"text": "RT @steverab: In our paper \"Towards a Science of AI Agent Reliability\" we put numbers on the capability-reliability gap. Now we're showing…",
"source": "Twitter for iPhone",
"retweetCount": 2,
"replyCount": 1,
"likeCount": 8,
"quoteCount": 1,
"viewCount": 574,
"createdAt": "Tue Mar 17 14:58:37 +0000 2026",
"lang": "en",
"bookmarkCount": 4,
"isReply": false,
"inReplyToId": null,
"conversationId": "2033920948861030411",
"displayTextRange": [
0,
139
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "random_walker",
"url": "https://x.com/random_walker",
"twitterUrl": "https://twitter.com/random_walker",
"id": "10834752",
"name": "Arvind Narayanan",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": null,
"profilePicture": "https://pbs.twimg.com/profile_images/1650881612756942850/bZYjMyFU_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/10834752/1488663432",
"description": "",
"location": "Princeton, NJ",
"followers": 126534,
"following": 524,
"status": "",
"canDm": false,
"canMediaTag": false,
"createdAt": "Tue Dec 04 11:14:14 +0000 2007",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 23702,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 917,
"statusesCount": 13075,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [
"2030988793751154890"
],
"profile_bio": {
"description": "Princeton CS prof and Director @PrincetonCITP. \nCoauthor of \"AI Snake Oil\" and \"AI as Normal Technology\". https://t.co/ZwebetjZ4n\nViews mine.",
"entities": {
"description": {
"hashtags": [],
"symbols": [],
"urls": [
{
"display_url": "normaltech.ai",
"expanded_url": "https://www.normaltech.ai/",
"indices": [
106,
129
],
"url": "https://t.co/ZwebetjZ4n"
}
],
"user_mentions": [
{
"id_str": "0",
"indices": [
31,
45
],
"name": "",
"screen_name": "PrincetonCITP"
}
]
},
"url": {
"urls": [
{
"display_url": "cs.princeton.edu/~arvindn/",
"expanded_url": "https://www.cs.princeton.edu/~arvindn/",
"indices": [
0,
23
],
"url": "https://t.co/px6fpS9QFq"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {},
"card": null,
"place": {},
"entities": {
"hashtags": [],
"symbols": [],
"timestamps": [],
"urls": [],
"user_mentions": [
{
"id_str": "138821636",
"indices": [
3,
12
],
"name": "Stephan Rabanser",
"screen_name": "steverab"
}
]
},
"quoted_tweet": null,
"retweeted_tweet": {
"type": "tweet",
"id": "2033906489350398178",
"url": "https://x.com/steverab/status/2033906489350398178",
"twitterUrl": "https://twitter.com/steverab/status/2033906489350398178",
"text": "In our paper \"Towards a Science of AI Agent Reliability\" we put numbers on the capability-reliability gap. Now we're showing what's behind them! \n\nWe conducted an extensive analysis of failures on GAIA across Claude Opus 4.5, Gemini 2.5 Pro, and GPT 5.4.\n\nHere's what we found ⬇️ https://t.co/GkdAxk0wDO",
"source": "Twitter for iPhone",
"retweetCount": 2,
"replyCount": 1,
"likeCount": 8,
"quoteCount": 1,
"viewCount": 574,
"createdAt": "Tue Mar 17 14:01:09 +0000 2026",
"lang": "en",
"bookmarkCount": 4,
"isReply": false,
"inReplyToId": null,
"conversationId": "2033906489350398178",
"displayTextRange": [
0,
279
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "steverab",
"url": "https://x.com/steverab",
"twitterUrl": "https://twitter.com/steverab",
"id": "138821636",
"name": "Stephan Rabanser",
"isVerified": false,
"isBlueVerified": false,
"verifiedType": null,
"profilePicture": "https://pbs.twimg.com/profile_images/1928214170547159040/VekssmRX_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/138821636/1760330288",
"description": "",
"location": "Princeton, NJ",
"followers": 576,
"following": 376,
"status": "",
"canDm": true,
"canMediaTag": false,
"createdAt": "Fri Apr 30 18:05:23 +0000 2010",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 470,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 1037,
"statusesCount": 10094,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [
"2026383575080108436"
],
"profile_bio": {
"description": "Postdoctoral Researcher @Princeton. Reliable, safe, trustworthy machine learning. Previously: @UofT @VectorInst @TU_Muenchen @Google @awscloud",
"entities": {
"description": {
"hashtags": [],
"symbols": [],
"urls": [],
"user_mentions": [
{
"id_str": "0",
"indices": [
24,
34
],
"name": "",
"screen_name": "Princeton"
},
{
"id_str": "0",
"indices": [
94,
99
],
"name": "",
"screen_name": "UofT"
},
{
"id_str": "0",
"indices": [
100,
111
],
"name": "",
"screen_name": "VectorInst"
},
{
"id_str": "0",
"indices": [
112,
124
],
"name": "",
"screen_name": "TU_Muenchen"
},
{
"id_str": "0",
"indices": [
125,
132
],
"name": "",
"screen_name": "Google"
},
{
"id_str": "0",
"indices": [
133,
142
],
"name": "",
"screen_name": "awscloud"
}
]
},
"url": {
"urls": [
{
"display_url": "rabanser.dev",
"expanded_url": "https://rabanser.dev",
"indices": [
0,
23
],
"url": "https://t.co/cNNItOKNEM"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {
"media": [
{
"allow_download_status": {
"allow_download": true
},
"display_url": "pic.twitter.com/GkdAxk0wDO",
"expanded_url": "https://twitter.com/steverab/status/2033906489350398178/photo/1",
"ext_media_availability": {
"status": "Available"
},
"features": {
"large": {
"faces": [
{
"h": 121,
"w": 121,
"x": 133,
"y": 219
}
]
},
"orig": {
"faces": [
{
"h": 167,
"w": 167,
"x": 183,
"y": 301
}
]
}
},
"id_str": "2033896512049074176",
"indices": [
280,
303
],
"media_key": "3_2033896512049074176",
"media_results": {
"id": "QXBpTWVkaWFSZXN1bHRzOgwAAQoAARw52hoGl1AACgACHDnjLQwXoOIAAA==",
"result": {
"__typename": "ApiMedia",
"id": "QXBpTWVkaWE6DAABCgABHDnaGgaXUAAKAAIcOeMtDBeg4gAA",
"media_key": "3_2033896512049074176"
}
},
"media_url_https": "https://pbs.twimg.com/media/HDnaGgaXUAA76f8.png",
"original_info": {
"focus_rects": [
{
"h": 1498,
"w": 2675,
"x": 131,
"y": 0
},
{
"h": 1498,
"w": 1498,
"x": 1003,
"y": 0
},
{
"h": 1498,
"w": 1314,
"x": 1095,
"y": 0
},
{
"h": 1498,
"w": 749,
"x": 1378,
"y": 0
},
{
"h": 1498,
"w": 2806,
"x": 0,
"y": 0
}
],
"height": 1498,
"width": 2806
},
"sizes": {
"large": {
"h": 1093,
"w": 2048
}
},
"type": "photo",
"url": "https://t.co/GkdAxk0wDO"
}
]
},
"card": null,
"place": {},
"entities": {
"hashtags": [],
"symbols": [],
"timestamps": [],
"urls": [],
"user_mentions": []
},
"quoted_tweet": null,
"retweeted_tweet": null,
"isLimitedReply": false,
"article": null
},
"isLimitedReply": false,
"article": null
}