🐦 Twitter Post Details

Viewing enriched Twitter post

@brian_ichter

How do you get zero-shot robot control from VLMs? Introducing Prompting with Iterative Visual Optimization, or PIVOT! It casts spatial reasoning tasks as VQA by visually annotating images, which VLMs can understand and answer. Project website: https://t.co/HBO4WHPJk6 https://t.co/g01YqRzzsa

Media 1

📊 Media Metadata

{
  "data": [
    {
      "id": "",
      "type": "photo",
      "url": null,
      "media_url": "https://pbs.twimg.com/media/GGLBj0oaYAAhKAO.jpg",
      "media_url_https": null,
      "display_url": null,
      "expanded_url": null
    }
  ],
  "score": 0.79,
  "scored_at": "2025-08-09T13:47:19.525441",
  "import_source": "manual_curation_2024",
  "media": [
    {
      "type": "photo",
      "url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1757179635350347800/media_0.jpg?",
      "filename": "media_0.jpg",
      "original_url": "https://pbs.twimg.com/media/GGLBj0oaYAAhKAO.jpg"
    }
  ],
  "storage_migrated": true
}

🔧 Raw API Response

{
  "user": {
    "created_at": "2014-11-19T06:33:49.000Z",
    "default_profile_image": false,
    "description": "Research Scientist at Google Brain, interested in robotics and AI",
    "fast_followers_count": 0,
    "favourites_count": 268,
    "followers_count": 555,
    "friends_count": 157,
    "has_custom_timelines": false,
    "is_translator": false,
    "listed_count": 15,
    "location": "San Francisco, CA",
    "media_count": 9,
    "name": "Brian Ichter",
    "normal_followers_count": 555,
    "possibly_sensitive": false,
    "profile_image_url_https": "https://pbs.twimg.com/profile_images/1628816098966618115/Mx9rcvPU_normal.jpg",
    "screen_name": "brian_ichter",
    "statuses_count": 22,
    "translator_type": "none",
    "verified": false,
    "withheld_in_countries": [],
    "id_str": "2904179526"
  },
  "id": "1757179635350347800",
  "conversation_id": "1757179635350347800",
  "full_text": "How do you get zero-shot robot control from VLMs?\n\nIntroducing Prompting with Iterative Visual Optimization, or PIVOT! It casts spatial reasoning tasks as VQA by visually annotating images, which VLMs can understand and answer.\nProject website: https://t.co/HBO4WHPJk6 https://t.co/g01YqRzzsa",
  "reply_count": 4,
  "retweet_count": 21,
  "favorite_count": 90,
  "hashtags": [],
  "symbols": [],
  "user_mentions": [],
  "urls": [
    {
      "url": "https://t.co/HBO4WHPJk6",
      "expanded_url": "https://pivot-prompt.github.io/",
      "display_url": "pivot-prompt.github.io"
    }
  ],
  "media": [
    {
      "media_url": "https://pbs.twimg.com/media/GGLBj0oaYAAhKAO.jpg",
      "type": "photo"
    }
  ],
  "url": "https://twitter.com/brian_ichter/status/1757179635350347800",
  "created_at": "2024-02-12T23:07:26.000Z",
  "#sort_index": "1757179635350347800",
  "view_count": 58011,
  "quote_count": 7,
  "is_quote_tweet": false,
  "is_retweet": false,
  "is_pinned": false,
  "is_truncated": false,
  "startUrl": "https://twitter.com/brian_ichter/status/1757179635350347800"
}