🐦 Twitter Post Details

Viewing enriched Twitter post

@Agentica_

It's easy to confuse Best@K vs Pass@K—and we've seen some misconceptions about our results. Our 59% on SWEBench-Verified is Pass@1 with Best@16, not Pass@8/16. Our Pass@8/16 is 67%/71%. So how did we achieve this? DeepSWE generates N candidate solutions. Then, another LLM… https://t.co/vikctUOMUh

View on Twitter

📊 Media Metadata

{
  "media": [
    {
      "type": "image",
      "url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1940872568359342129/media_0.jpg?",
      "filename": "media_0.jpg"
    }
  ],
  "nlp": {
    "sentiment": "neutral",
    "topics": [
      "Best@K vs Pass@K",
      "SWEBench-Verified results",
      "DeepSWE"
    ],
    "entities": [],
    "summary": "The tweet clarifies misconceptions regarding the performance metrics of Best@K and Pass@K in SWEBench-Verified results.",
    "language": "en",
    "processed_at": "2025-08-07T09:58:04.583287"
  }
}

🔧 Raw API Response

{
  "data": {
    "threaded_conversation_with_injections_v2": {
      "instructions": [
        {
          "type": "TimelineClearCache"
        },
        {
          "type": "TimelineAddEntries",
          "entries": [
            {
              "entryId": "tweet-1940872568359342129",
              "sortIndex": "1953485791669780480",
              "content": {
                "entryType": "TimelineTimelineItem",
                "__typename": "TimelineTimelineItem",
                "itemContent": {
                  "itemType": "TimelineTweet",
                  "__typename": "TimelineTweet",
                  "tweet_results": {
                    "result": {
                      "__typename": "Tweet",
                      "rest_id": "1940872568359342129",
                      "has_birdwatch_notes": false,
                      "core": {
                        "user_results": {
                          "result": {
                            "__typename": "User",
                            "id": "VXNlcjoxODg0NDk3MjgxODcwOTI5OTIw",
                            "rest_id": "1884497281870929920",
                            "affiliates_highlighted_label": {},
                            "has_graduated_access": true,
                            "is_blue_verified": true,
                            "profile_image_shape": "Circle",
                            "legacy": {
                              "can_dm": false,
                              "can_media_tag": true,
                              "created_at": "Wed Jan 29 07:02:25 +0000 2025",
                              "default_profile": true,
                              "default_profile_image": false,
                              "description": "Building generalist agents that scale @BerkeleySky",
                              "entities": {
                                "description": {
                                  "urls": []
                                },
                                "url": {
                                  "urls": [
                                    {
                                      "display_url": "agentica-project.com",
                                      "expanded_url": "http://www.agentica-project.com",
                                      "url": "https://t.co/2Vk1O6WpCS",
                                      "indices": [
                                        0,
                                        23
                                      ]
                                    }
                                  ]
                                }
                              },
                              "fast_followers_count": 0,
                              "favourites_count": 125,
                              "followers_count": 2581,
                              "friends_count": 8,
                              "has_custom_timelines": false,
                              "is_translator": false,
                              "listed_count": 42,
                              "location": "San Francisco, CA",
                              "media_count": 24,
                              "name": "Agentica Project",
                              "normal_followers_count": 2581,
                              "pinned_tweet_ids_str": [
                                "1940478919532335538"
                              ],
                              "possibly_sensitive": false,
                              "profile_banner_url": "https://pbs.twimg.com/profile_banners/1884497281870929920/1738138474",
                              "profile_image_url_https": "https://pbs.twimg.com/profile_images/1884509075607805955/Ix2PoQbu_normal.jpg",
                              "profile_interstitial_type": "",
                              "screen_name": "Agentica_",
                              "statuses_count": 72,
                              "translator_type": "none",
                              "url": "https://t.co/2Vk1O6WpCS",
                              "verified": false,
                              "want_retweets": false,
                              "withheld_in_countries": []
                            },
                            "tipjar_settings": {}
                          }
                        }
                      },
                      "unmention_data": {},
                      "edit_control": {
                        "edit_tweet_ids": [
                          "1940872568359342129"
                        ],
                        "editable_until_msecs": "1751578654000",
                        "is_edit_eligible": true,
                        "edits_remaining": "5"
                      },
                      "is_translatable": false,
                      "views": {
                        "count": "23549",
                        "state": "EnabledWithCount"
                      },
                      "source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>",
                      "note_tweet": {
                        "is_expandable": true,
                        "note_tweet_results": {
                          "result": {
                            "id": "Tm90ZVR3ZWV0OjE5NDA4NzI1NjgyMDQwOTk1ODQ=",
                            "text": "It's easy to confuse Best@K vs Pass@K—and we've seen some misconceptions about our results.  \n\nOur 59% on SWEBench-Verified is Pass@1 with Best@16, not Pass@8/16. Our Pass@8/16 is 67%/71%.  \n\nSo how did we achieve this? \n\nDeepSWE generates N candidate solutions. Then, another LLM (DeepSWE-Verifier) picks the best one to submit for evaluation. This is a classic form of test-time scaling (TTS) using Best@N.  \n\nEvaluating agents with TTS is common in SWEBench because there's no canonical agent design (unlike math or reasoning). Many top agents use multi-agent setups or agentic workflows (like Agentless), which naturally involve more LLM calls and test-time compute.   \n\nSWEBench ranks models by Pass@1, regardless of how many candidates were generated—as long as only one final solution is verified.   \n\nAmong open-weight models, the top prior entry was Skywork-32B-SWE + TTS (Best@8) at 47%. DeepSWE + TTS (Best@16) now leads with 59% and becomes SOTA in this category.   \n\nFigure below shows how DeepSWE's performance scale with TTS:",
                            "entity_set": {
                              "hashtags": [],
                              "symbols": [],
                              "urls": [],
                              "user_mentions": []
                            },
                            "richtext": {
                              "richtext_tags": [
                                {
                                  "from_index": 21,
                                  "to_index": 27,
                                  "richtext_types": [
                                    "Bold"
                                  ]
                                },
                                {
                                  "from_index": 31,
                                  "to_index": 37,
                                  "richtext_types": [
                                    "Bold"
                                  ]
                                },
                                {
                                  "from_index": 127,
                                  "to_index": 133,
                                  "richtext_types": [
                                    "Bold"
                                  ]
                                },
                                {
                                  "from_index": 139,
                                  "to_index": 146,
                                  "richtext_types": [
                                    "Bold"
                                  ]
                                },
                                {
                                  "from_index": 152,
                                  "to_index": 161,
                                  "richtext_types": [
                                    "Bold"
                                  ]
                                },
                                {
                                  "from_index": 167,
                                  "to_index": 176,
                                  "richtext_types": [
                                    "Bold"
                                  ]
                                },
                                {
                                  "from_index": 180,
                                  "to_index": 187,
                                  "richtext_types": [
                                    "Bold"
                                  ]
                                },
                                {
                                  "from_index": 937,
                                  "to_index": 940,
                                  "richtext_types": [
                                    "Bold"
                                  ]
                                }
                              ]
                            },
                            "media": {
                              "inline_media": []
                            }
                          }
                        }
                      },
                      "quoted_status_result": {
                        "result": {
                          "__typename": "Tweet",
                          "rest_id": "1940696309532709298",
                          "has_birdwatch_notes": false,
                          "core": {
                            "user_results": {
                              "result": {
                                "__typename": "User",
                                "id": "VXNlcjoxMTYzODAxNDUwOTY4OTk3ODg5",
                                "rest_id": "1163801450968997889",
                                "affiliates_highlighted_label": {},
                                "has_graduated_access": true,
                                "is_blue_verified": true,
                                "profile_image_shape": "Circle",
                                "legacy": {
                                  "can_dm": false,
                                  "can_media_tag": true,
                                  "created_at": "Tue Aug 20 13:14:30 +0000 2019",
                                  "default_profile": true,
                                  "default_profile_image": false,
                                  "description": "NLP Scientist | AutoAWQ Creator | Open-Source Contributor",
                                  "entities": {
                                    "description": {
                                      "urls": []
                                    },
                                    "url": {
                                      "urls": [
                                        {
                                          "display_url": "github.com/casper-hansen",
                                          "expanded_url": "https://github.com/casper-hansen",
                                          "url": "https://t.co/MInf4sYGZj",
                                          "indices": [
                                            0,
                                            23
                                          ]
                                        }
                                      ]
                                    }
                                  },
                                  "fast_followers_count": 0,
                                  "favourites_count": 2138,
                                  "followers_count": 9730,
                                  "friends_count": 446,
                                  "has_custom_timelines": false,
                                  "is_translator": false,
                                  "listed_count": 175,
                                  "location": "",
                                  "media_count": 480,
                                  "name": "Casper Hansen",
                                  "normal_followers_count": 9730,
                                  "pinned_tweet_ids_str": [
                                    "1915055132087320644"
                                  ],
                                  "possibly_sensitive": false,
                                  "profile_image_url_https": "https://pbs.twimg.com/profile_images/1463225585799467013/ndVxzFtj_normal.jpg",
                                  "profile_interstitial_type": "",
                                  "screen_name": "casper_hansen_",
                                  "statuses_count": 4013,
                                  "translator_type": "none",
                                  "url": "https://t.co/MInf4sYGZj",
                                  "verified": false,
                                  "want_retweets": false,
                                  "withheld_in_countries": []
                                },
                                "tipjar_settings": {}
                              }
                            }
                          },
                          "unmention_data": {},
                          "edit_control": {
                            "edit_tweet_ids": [
                              "1940696309532709298"
                            ],
                            "editable_until_msecs": "1751536631000",
                            "is_edit_eligible": false,
                            "edits_remaining": "5"
                          },
                          "is_translatable": false,
                          "views": {
                            "count": "36503",
                            "state": "EnabledWithCount"
                          },
                          "source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>",
                          "quotedRefResult": {
                            "result": {
                              "__typename": "Tweet",
                              "rest_id": "1940495968383717553"
                            }
                          },
                          "legacy": {
                            "bookmark_count": 23,
                            "bookmarked": false,
                            "created_at": "Thu Jul 03 08:57:11 +0000 2025",
                            "conversation_id_str": "1940696309532709298",
                            "display_text_range": [
                              0,
                              180
                            ],
                            "entities": {
                              "hashtags": [],
                              "symbols": [],
                              "timestamps": [],
                              "urls": [],
                              "user_mentions": []
                            },
                            "favorite_count": 134,
                            "favorited": false,
                            "full_text": "Is it malpractice to report SOTA with pass@8 without using other models at pass@8 or just standard practice at this point? It's clearly not SOTA if it's behind Devstral in a pass@1",
                            "is_quote_status": true,
                            "lang": "en",
                            "quote_count": 2,
                            "quoted_status_id_str": "1940495968383717553",
                            "quoted_status_permalink": {
                              "url": "https://t.co/7lOb7XlypE",
                              "expanded": "https://twitter.com/togethercompute/status/1940495968383717553",
                              "display": "x.com/togethercomput…"
                            },
                            "reply_count": 11,
                            "retweet_count": 5,
                            "retweeted": false,
                            "user_id_str": "1163801450968997889",
                            "id_str": "1940696309532709298"
                          }
                        }
                      },
                      "legacy": {
                        "bookmark_count": 30,
                        "bookmarked": false,
                        "created_at": "Thu Jul 03 20:37:34 +0000 2025",
                        "conversation_id_str": "1940872568359342129",
                        "display_text_range": [
                          0,
                          281
                        ],
                        "entities": {
                          "hashtags": [],
                          "media": [
                            {
                              "display_url": "pic.x.com/vikctUOMUh",
                              "expanded_url": "https://x.com/Agentica_/status/1940872568359342129/photo/1",
                              "id_str": "1940872478072745984",
                              "indices": [
                                282,
                                305
                              ],
                              "media_key": "3_1940872478072745984",
                              "media_url_https": "https://pbs.twimg.com/media/Gu9dPFHbsAAtTPk.jpg",
                              "type": "photo",
                              "url": "https://t.co/vikctUOMUh",
                              "ext_media_availability": {
                                "status": "Available"
                              },
                              "features": {
                                "large": {
                                  "faces": []
                                },
                                "medium": {
                                  "faces": []
                                },
                                "small": {
                                  "faces": []
                                },
                                "orig": {
                                  "faces": []
                                }
                              },
                              "sizes": {
                                "large": {
                                  "h": 1521,
                                  "w": 2048,
                                  "resize": "fit"
                                },
                                "medium": {
                                  "h": 891,
                                  "w": 1200,
                                  "resize": "fit"
                                },
                                "small": {
                                  "h": 505,
                                  "w": 680,
                                  "resize": "fit"
                                },
                                "thumb": {
                                  "h": 150,
                                  "w": 150,
                                  "resize": "crop"
                                }
                              },
                              "original_info": {
                                "height": 3041,
                                "width": 4096,
                                "focus_rects": [
                                  {
                                    "x": 0,
                                    "y": 0,
                                    "w": 4096,
                                    "h": 2294
                                  },
                                  {
                                    "x": 0,
                                    "y": 0,
                                    "w": 3041,
                                    "h": 3041
                                  },
                                  {
                                    "x": 0,
                                    "y": 0,
                                    "w": 2668,
                                    "h": 3041
                                  },
                                  {
                                    "x": 364,
                                    "y": 0,
                                    "w": 1521,
                                    "h": 3041
                                  },
                                  {
                                    "x": 0,
                                    "y": 0,
                                    "w": 4096,
                                    "h": 3041
                                  }
                                ]
                              },
                              "allow_download_status": {
                                "allow_download": true
                              },
                              "media_results": {
                                "result": {
                                  "media_key": "3_1940872478072745984"
                                }
                              }
                            }
                          ],
                          "symbols": [],
                          "timestamps": [],
                          "urls": [],
                          "user_mentions": []
                        },
                        "extended_entities": {
                          "media": [
                            {
                              "display_url": "pic.x.com/vikctUOMUh",
                              "expanded_url": "https://x.com/Agentica_/status/1940872568359342129/photo/1",
                              "id_str": "1940872478072745984",
                              "indices": [
                                282,
                                305
                              ],
                              "media_key": "3_1940872478072745984",
                              "media_url_https": "https://pbs.twimg.com/media/Gu9dPFHbsAAtTPk.jpg",
                              "type": "photo",
                              "url": "https://t.co/vikctUOMUh",
                              "ext_media_availability": {
                                "status": "Available"
                              },
                              "features": {
                                "large": {
                                  "faces": []
                                },
                                "medium": {
                                  "faces": []
                                },
                                "small": {
                                  "faces": []
                                },
                                "orig": {
                                  "faces": []
                                }
                              },
                              "sizes": {
                                "large": {
                                  "h": 1521,
                                  "w": 2048,
                                  "resize": "fit"
                                },
                                "medium": {
                                  "h": 891,
                                  "w": 1200,
                                  "resize": "fit"
                                },
                                "small": {
                                  "h": 505,
                                  "w": 680,
                                  "resize": "fit"
                                },
                                "thumb": {
                                  "h": 150,
                                  "w": 150,
                                  "resize": "crop"
                                }
                              },
                              "original_info": {
                                "height": 3041,
                                "width": 4096,
                                "focus_rects": [
                                  {
                                    "x": 0,
                                    "y": 0,
                                    "w": 4096,
                                    "h": 2294
                                  },
                                  {
                                    "x": 0,
                                    "y": 0,
                                    "w": 3041,
                                    "h": 3041
                                  },
                                  {
                                    "x": 0,
                                    "y": 0,
                                    "w": 2668,
                                    "h": 3041
                                  },
                                  {
                                    "x": 364,
                                    "y": 0,
                                    "w": 1521,
                                    "h": 3041
                                  },
                                  {
                                    "x": 0,
                                    "y": 0,
                                    "w": 4096,
                                    "h": 3041
                                  }
                                ]
                              },
                              "allow_download_status": {
                                "allow_download": true
                              },
                              "media_results": {
                                "result": {
                                  "media_key": "3_1940872478072745984"
                                }
                              }
                            }
                          ]
                        },
                        "favorite_count": 52,
                        "favorited": false,
                        "full_text": "It's easy to confuse Best@K vs Pass@K—and we've seen some misconceptions about our results.  \n\nOur 59% on SWEBench-Verified is Pass@1 with Best@16, not Pass@8/16. Our Pass@8/16 is 67%/71%.  \n\nSo how did we achieve this? \n\nDeepSWE generates N candidate solutions. Then, another LLM… https://t.co/vikctUOMUh",
                        "is_quote_status": true,
                        "lang": "en",
                        "possibly_sensitive": false,
                        "possibly_sensitive_editable": true,
                        "quote_count": 5,
                        "quoted_status_id_str": "1940696309532709298",
                        "quoted_status_permalink": {
                          "url": "https://t.co/S86OGVeGvw",
                          "expanded": "https://twitter.com/casper_hansen_/status/1940696309532709298",
                          "display": "x.com/casper_hansen_…"
                        },
                        "reply_count": 1,
                        "retweet_count": 15,
                        "retweeted": false,
                        "user_id_str": "1884497281870929920",
                        "id_str": "1940872568359342129"
                      },
                      "quick_promote_eligibility": {
                        "eligibility": "IneligibleNotProfessional"
                      }
                    }
                  },
                  "tweetDisplayType": "Tweet"
                },
                "clientEventInfo": {
                  "component": "tweet",
                  "element": "tweet"
                }
              }
            },
            {
              "entryId": "conversationthread-1940891150824755530",
              "sortIndex": "1953485791669780470",
              "content": {
                "entryType": "TimelineTimelineModule",
                "__typename": "TimelineTimelineModule",
                "items": [
                  {
                    "entryId": "conversationthread-1940891150824755530-tweet-1940891150824755530",
                    "item": {
                      "itemContent": {
                        "itemType": "TimelineTweet",
                        "__typename": "TimelineTweet",
                        "tweet_results": {
                          "result": {
                            "__typename": "Tweet",
                            "rest_id": "1940891150824755530",
                            "has_birdwatch_notes": false,
                            "core": {
                              "user_results": {
                                "result": {
                                  "__typename": "User",
                                  "id": "VXNlcjoxNDEyNDk4MDkxMjcwNDU1MzAw",
                                  "rest_id": "1412498091270455300",
                                  "affiliates_highlighted_label": {},
                                  "has_graduated_access": true,
                                  "is_blue_verified": true,
                                  "profile_image_shape": "Circle",
                                  "legacy": {
                                    "can_dm": true,
                                    "can_media_tag": false,
                                    "created_at": "Tue Jul 06 19:46:16 +0000 2021",
                                    "default_profile": true,
                                    "default_profile_image": false,
                                    "description": "Ars longa \n\nSoftware for research engineers",
                                    "entities": {
                                      "description": {
                                        "urls": []
                                      },
                                      "url": {
                                        "urls": [
                                          {
                                            "display_url": "usesynth.ai",
                                            "expanded_url": "https://www.usesynth.ai",
                                            "url": "https://t.co/5ucKJb8SyC",
                                            "indices": [
                                              0,
                                              23
                                            ]
                                          }
                                        ]
                                      }
                                    },
                                    "fast_followers_count": 0,
                                    "favourites_count": 38235,
                                    "followers_count": 2112,
                                    "friends_count": 4042,
                                    "has_custom_timelines": true,
                                    "is_translator": false,
                                    "listed_count": 29,
                                    "location": "San Francisco",
                                    "media_count": 51,
                                    "name": "Josh",
                                    "normal_followers_count": 2112,
                                    "pinned_tweet_ids_str": [
                                      "1696359029847499118"
                                    ],
                                    "possibly_sensitive": false,
                                    "profile_banner_url": "https://pbs.twimg.com/profile_banners/1412498091270455300/1745524802",
                                    "profile_image_url_https": "https://pbs.twimg.com/profile_images/1920715043882012672/8SCReXAr_normal.jpg",
                                    "profile_interstitial_type": "",
                                    "screen_name": "JoshPurtell",
                                    "statuses_count": 2399,
                                    "translator_type": "none",
                                    "url": "https://t.co/5ucKJb8SyC",
                                    "verified": false,
                                    "want_retweets": false,
                                    "withheld_in_countries": []
                                  },
                                  "tipjar_settings": {}
                                }
                              }
                            },
                            "unmention_data": {},
                            "edit_control": {
                              "edit_tweet_ids": [
                                "1940891150824755530"
                              ],
                              "editable_until_msecs": "1751583085000",
                              "is_edit_eligible": false,
                              "edits_remaining": "5"
                            },
                            "is_translatable": false,
                            "views": {
                              "count": "620",
                              "state": "EnabledWithCount"
                            },
                            "source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>",
                            "legacy": {
                              "bookmark_count": 0,
                              "bookmarked": false,
                              "created_at": "Thu Jul 03 21:51:25 +0000 2025",
                              "conversation_id_str": "1940872568359342129",
                              "display_text_range": [
                                11,
                                131
                              ],
                              "entities": {
                                "hashtags": [],
                                "symbols": [],
                                "timestamps": [],
                                "urls": [],
                                "user_mentions": [
                                  {
                                    "id_str": "1884497281870929920",
                                    "name": "Agentica Project",
                                    "screen_name": "Agentica_",
                                    "indices": [
                                      0,
                                      10
                                    ]
                                  }
                                ]
                              },
                              "favorite_count": 2,
                              "favorited": false,
                              "full_text": "@Agentica_ Open to sharing a distribution on compute / tokens?\n\nUltimately what matters is how many flops got thrown at the problem",
                              "in_reply_to_screen_name": "Agentica_",
                              "in_reply_to_status_id_str": "1940872568359342129",
                              "in_reply_to_user_id_str": "1884497281870929920",
                              "is_quote_status": false,
                              "lang": "en",
                              "quote_count": 1,
                              "reply_count": 0,
                              "retweet_count": 0,
                              "retweeted": false,
                              "user_id_str": "1412498091270455300",
                              "id_str": "1940891150824755530"
                            },
                            "quick_promote_eligibility": {
                              "eligibility": "IneligibleNotProfessional"
                            }
                          }
                        },
                        "tweetDisplayType": "Tweet"
                      },
                      "clientEventInfo": {
                        "component": "tweet",
                        "element": "tweet",
                        "details": {
                          "conversationDetails": {
                            "conversationSection": "HighQuality"
                          },
                          "timelinesDetails": {
                            "controllerData": "DAACDAAEDAABCgABAAAAAAAAAAEKAAIAAAAAAAAAAAAAAAA="
                          }
                        }
                      }
                    }
                  }
                ],
                "metadata": {
                  "conversationMetadata": {
                    "allTweetIds": [
                      "1940891150824755530"
                    ],
                    "enableDeduplication": true
                  }
                },
                "displayType": "VerticalConversation",
                "clientEventInfo": {
                  "component": "tweet",
                  "details": {
                    "conversationDetails": {
                      "conversationSection": "HighQuality"
                    },
                    "timelinesDetails": {
                      "controllerData": "DAACDAAEDAABCgABAAAAAAAAAAEKAAIAAAAAAAAAAAAAAAA="
                    }
                  }
                }
              }
            },
            {
              "entryId": "cursor-bottom-1953485791669780469",
              "sortIndex": "1953485791669780469",
              "content": {
                "entryType": "TimelineTimelineCursor",
                "__typename": "TimelineTimelineCursor",
                "value": "DAAFCgABGxws-j4___MLAAIAAAAwRW1QQzZ3QUFBZlEvZ0dKTjB2R3AvQUFBQUFJYTcyNDM2SnZSU2hydlhWRlhXOUF4CAADAAAAAgAA",
                "cursorType": "Bottom"
              }
            }
          ]
        },
        {
          "type": "TimelineTerminateTimeline",
          "direction": "Top"
        },
        {
          "type": "TimelineTerminateTimeline",
          "direction": "Bottom"
        }
      ],
      "metadata": {
        "scribeConfig": {
          "page": "ranked_replies"
        }
      }
    }
  }
}