@random_walker
RT @dongyangzi: >We gave an AI agent an Apple Developer account, a Mac VM, and one task: build and publish an iOS app. It succeeded, at a c…
Viewing enriched Twitter post
RT @dongyangzi: >We gave an AI agent an Apple Developer account, a Mac VM, and one task: build and publish an iOS app. It succeeded, at a c…
{
"score": 0.34,
"score_components": {
"author": 0.09,
"engagement": 0.0,
"quality": 0.04000000000000001,
"source": 0.135,
"nlp": 0.05,
"recency": 0.025
},
"scored_at": "2026-04-16T19:01:02.915863",
"import_source": "api_import",
"source_tagged_at": "2026-04-16T19:01:02.915876",
"enriched": true,
"enriched_at": "2026-04-16T19:01:02.915878"
} {
"type": "tweet",
"id": "2044853464476299413",
"url": "https://x.com/random_walker/status/2044853464476299413",
"twitterUrl": "https://twitter.com/random_walker/status/2044853464476299413",
"text": "RT @dongyangzi: >We gave an AI agent an Apple Developer account, a Mac VM, and one task: build and publish an iOS app. It succeeded, at a c…",
"source": "Twitter for iPhone",
"retweetCount": 1,
"replyCount": 1,
"likeCount": 1,
"quoteCount": 0,
"viewCount": 29,
"createdAt": "Thu Apr 16 19:00:32 +0000 2026",
"lang": "en",
"bookmarkCount": 0,
"isReply": false,
"inReplyToId": null,
"conversationId": "2044853464476299413",
"displayTextRange": [
0,
143
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "random_walker",
"url": "https://x.com/random_walker",
"twitterUrl": "https://twitter.com/random_walker",
"id": "10834752",
"name": "Arvind Narayanan",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": null,
"profilePicture": "https://pbs.twimg.com/profile_images/1650881612756942850/bZYjMyFU_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/10834752/1488663432",
"description": "",
"location": "Princeton, NJ",
"followers": 126537,
"following": 530,
"status": "",
"canDm": false,
"canMediaTag": false,
"createdAt": "Tue Dec 04 11:14:14 +0000 2007",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 24098,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 920,
"statusesCount": 13110,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [
"2030988793751154890"
],
"profile_bio": {
"description": "Princeton CS prof and Director @PrincetonCITP. \nCoauthor of \"AI Snake Oil\" and \"AI as Normal Technology\". https://t.co/ZwebetjZ4n\nViews mine.",
"entities": {
"description": {
"urls": [
{
"display_url": "normaltech.ai",
"expanded_url": "https://www.normaltech.ai/",
"indices": [
106,
129
],
"url": "https://t.co/ZwebetjZ4n"
}
],
"user_mentions": [
{
"id_str": "",
"indices": [
31,
45
],
"name": "",
"screen_name": "PrincetonCITP"
}
]
},
"url": {
"urls": [
{
"display_url": "cs.princeton.edu/~arvindn/",
"expanded_url": "https://www.cs.princeton.edu/~arvindn/",
"indices": [
0,
23
],
"url": "https://t.co/px6fpS9QFq"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {},
"card": null,
"place": {},
"entities": {
"hashtags": [],
"symbols": [],
"timestamps": [],
"urls": [],
"user_mentions": [
{
"id_str": "294665333",
"indices": [
3,
14
],
"name": "Zi",
"screen_name": "dongyangzi"
}
]
},
"quoted_tweet": null,
"retweeted_tweet": {
"type": "tweet",
"id": "2044850284191699085",
"url": "https://x.com/dongyangzi/status/2044850284191699085",
"twitterUrl": "https://twitter.com/dongyangzi/status/2044850284191699085",
"text": ">We gave an AI agent an Apple Developer account, a Mac VM, and one task: build and publish an iOS app. It succeeded, at a cost of about $1,000.\n\nGreat research on what it takes for an agent to do real world work. Some interesting areas of improvements:",
"source": "Twitter for iPhone",
"retweetCount": 1,
"replyCount": 1,
"likeCount": 1,
"quoteCount": 0,
"viewCount": 29,
"createdAt": "Thu Apr 16 18:47:53 +0000 2026",
"lang": "en",
"bookmarkCount": 0,
"isReply": false,
"inReplyToId": null,
"conversationId": "2044850284191699085",
"displayTextRange": [
0,
255
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "dongyangzi",
"url": "https://x.com/dongyangzi",
"twitterUrl": "https://twitter.com/dongyangzi",
"id": "294665333",
"name": "Zi",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": null,
"profilePicture": "https://pbs.twimg.com/profile_images/1989465989923422208/324f_4iI_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/294665333/1772644526",
"description": "",
"location": "San Francisco",
"followers": 78,
"following": 197,
"status": "",
"canDm": true,
"canMediaTag": true,
"createdAt": "Sat May 07 15:08:15 +0000 2011",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 35,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 12,
"statusesCount": 51,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [],
"profile_bio": {
"description": "computer use for windows | prev @Databricks @Samsara @Stanford",
"entities": {
"description": {
"user_mentions": [
{
"id_str": "",
"indices": [
32,
43
],
"name": "",
"screen_name": "Databricks"
},
{
"id_str": "",
"indices": [
44,
52
],
"name": "",
"screen_name": "Samsara"
},
{
"id_str": "",
"indices": [
53,
62
],
"name": "",
"screen_name": "Stanford"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {},
"card": null,
"place": {},
"entities": {
"hashtags": [],
"symbols": [],
"timestamps": [],
"urls": [],
"user_mentions": []
},
"quoted_tweet": {
"type": "tweet",
"id": "2044841045867778365",
"url": "https://x.com/random_walker/status/2044841045867778365",
"twitterUrl": "https://twitter.com/random_walker/status/2044841045867778365",
"text": "📢📢A double launch today! We’re releasing a paper analyzing the rapidly growing trend of “open-world evaluations” for measuring frontier AI capabilities. We’re also launching a new project, CRUX (Collaborative Research for Updating AI eXpectations), an effort to regularly conduct such evaluations ourselves.\n\nI think open-world evals are the most important development in AI evaluation over the past year. Our paper explains why we need them, what they can and can’t tell us, and how to do them well.\n\nIn CRUX #1, we tasked an agent with building and publishing a simple iOS app to the Apple App store. The paper has many “lessons from the trenches” from running this experiment. We hope you find it interesting! CRUX #2 will be about AI R&D automation.\n\nThe core team is @sayashk, @PKirgis, @steverab, Andrew Schwartz, and me. We’re delighted to have assembled an amazing group of collaborators, many of whom have conducted important open-world evaluations: @fly_upside_down, @RishiBommasani, @DubMagda, @ghadfield, @ahall_research, @sarahookr, @sethlazar, @snewmanpv, @DimitrisPapail, @shostekofsky, @hlntnr, and @CUdudec.\n\nPaper: https://t.co/M15jgh4PCP\nHTML version: https://t.co/iuVW7RAlr5\nCRUX website: https://t.co/g937gpS65j",
"source": "Twitter for iPhone",
"retweetCount": 9,
"replyCount": 1,
"likeCount": 25,
"quoteCount": 1,
"viewCount": 1595,
"createdAt": "Thu Apr 16 18:11:11 +0000 2026",
"lang": "en",
"bookmarkCount": 13,
"isReply": false,
"inReplyToId": null,
"conversationId": "2044841045867778365",
"displayTextRange": [
0,
271
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "random_walker",
"url": "https://x.com/random_walker",
"twitterUrl": "https://twitter.com/random_walker",
"id": "10834752",
"name": "Arvind Narayanan",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": null,
"profilePicture": "https://pbs.twimg.com/profile_images/1650881612756942850/bZYjMyFU_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/10834752/1488663432",
"description": "",
"location": "Princeton, NJ",
"followers": 126537,
"following": 530,
"status": "",
"canDm": false,
"canMediaTag": false,
"createdAt": "Tue Dec 04 11:14:14 +0000 2007",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 24098,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 920,
"statusesCount": 13110,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [
"2030988793751154890"
],
"profile_bio": {
"description": "Princeton CS prof and Director @PrincetonCITP. \nCoauthor of \"AI Snake Oil\" and \"AI as Normal Technology\". https://t.co/ZwebetjZ4n\nViews mine.",
"entities": {
"description": {
"urls": [
{
"display_url": "normaltech.ai",
"expanded_url": "https://www.normaltech.ai/",
"indices": [
106,
129
],
"url": "https://t.co/ZwebetjZ4n"
}
],
"user_mentions": [
{
"id_str": "",
"indices": [
31,
45
],
"name": "",
"screen_name": "PrincetonCITP"
}
]
},
"url": {
"urls": [
{
"display_url": "cs.princeton.edu/~arvindn/",
"expanded_url": "https://www.cs.princeton.edu/~arvindn/",
"indices": [
0,
23
],
"url": "https://t.co/px6fpS9QFq"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {
"media": [
{
"allow_download_status": {
"allow_download": true
},
"display_url": "pic.twitter.com/KHNHmVxbM2",
"expanded_url": "https://twitter.com/random_walker/status/2044841045867778365/photo/1",
"ext_media_availability": {
"status": "Available"
},
"features": {
"large": {
"faces": []
},
"orig": {
"faces": []
}
},
"id_str": "2044840919237632000",
"indices": [
272,
295
],
"media_key": "3_2044840919237632000",
"media_results": {
"id": "QXBpTWVkaWFSZXN1bHRzOgwAAQoAARxgu/tlV6AACgACHGC8GOEWUT0AAA==",
"result": {
"__typename": "ApiMedia",
"id": "QXBpTWVkaWE6DAABCgABHGC7+2VXoAAKAAIcYLwY4RZRPQAA",
"media_key": "3_2044840919237632000"
}
},
"media_url_https": "https://pbs.twimg.com/media/HGC7-2VXoAAlD6J.png",
"original_info": {
"focus_rects": [
{
"h": 999,
"w": 1784,
"x": 0,
"y": 0
},
{
"h": 1784,
"w": 1784,
"x": 0,
"y": 0
},
{
"h": 1854,
"w": 1626,
"x": 79,
"y": 0
},
{
"h": 1854,
"w": 927,
"x": 429,
"y": 0
},
{
"h": 1854,
"w": 1784,
"x": 0,
"y": 0
}
],
"height": 1854,
"width": 1784
},
"sizes": {
"large": {
"h": 1854,
"w": 1784
}
},
"type": "photo",
"url": "https://t.co/KHNHmVxbM2"
}
]
},
"card": null,
"place": {},
"entities": {
"hashtags": [],
"symbols": [],
"urls": [
{
"display_url": "cruxevals.com/open-world-eva…",
"expanded_url": "https://cruxevals.com/open-world-evaluations.pdf",
"indices": [
1134,
1157
],
"url": "https://t.co/M15jgh4PCP"
},
{
"display_url": "normaltech.ai/p/open-world-e…",
"expanded_url": "https://www.normaltech.ai/p/open-world-evaluations-for-measuring",
"indices": [
1172,
1195
],
"url": "https://t.co/iuVW7RAlr5"
},
{
"display_url": "cruxevals.com",
"expanded_url": "https://cruxevals.com/",
"indices": [
1210,
1233
],
"url": "https://t.co/g937gpS65j"
}
],
"user_mentions": [
{
"id_str": "3084274082",
"indices": [
772,
780
],
"name": "Sayash Kapoor",
"screen_name": "sayashk"
},
{
"id_str": "1036066345547444225",
"indices": [
782,
790
],
"name": "Peter Kirgis",
"screen_name": "PKirgis"
},
{
"id_str": "138821636",
"indices": [
792,
801
],
"name": "Stephan Rabanser",
"screen_name": "steverab"
},
{
"id_str": "244504407",
"indices": [
960,
976
],
"name": "JJ Allaire",
"screen_name": "fly_upside_down"
},
{
"id_str": "895659037198393344",
"indices": [
978,
993
],
"name": "rishi",
"screen_name": "RishiBommasani"
},
{
"id_str": "804222097325166592",
"indices": [
995,
1004
],
"name": "Magda Dubois",
"screen_name": "DubMagda"
},
{
"id_str": "29931309",
"indices": [
1006,
1016
],
"name": "Gillian Hadfield",
"screen_name": "ghadfield"
},
{
"id_str": "1478820195586084864",
"indices": [
1018,
1033
],
"name": "Andy Hall",
"screen_name": "ahall_research"
},
{
"id_str": "731538535795163136",
"indices": [
1035,
1045
],
"name": "Sara Hooker",
"screen_name": "sarahookr"
},
{
"id_str": "351808995",
"indices": [
1047,
1057
],
"name": "Seth Lazar",
"screen_name": "sethlazar"
},
{
"id_str": "205486394",
"indices": [
1059,
1069
],
"name": "Steve Newman",
"screen_name": "snewmanpv"
},
{
"id_str": "573817445",
"indices": [
1071,
1086
],
"name": "Dimitris Papailiopoulos",
"screen_name": "DimitrisPapail"
},
{
"id_str": "863524048751427584",
"indices": [
1088,
1101
],
"name": "Shoshannah Tekofsky",
"screen_name": "shostekofsky"
},
{
"id_str": "2907985939",
"indices": [
1103,
1110
],
"name": "Helen Toner",
"screen_name": "hlntnr"
},
{
"id_str": "1404056967220432899",
"indices": [
1116,
1124
],
"name": "Cozmin Ududec",
"screen_name": "CUdudec"
}
]
},
"quoted_tweet": null,
"retweeted_tweet": null,
"isLimitedReply": false,
"communityInfo": null,
"article": null
},
"retweeted_tweet": null,
"isLimitedReply": false,
"communityInfo": null,
"article": null
},
"isLimitedReply": false,
"communityInfo": null,
"article": null
}