🐦 Twitter Post Details

Viewing enriched Twitter post

@_akhaliq

Improving Multimodal Datasets with Image Captioning paper page: https://t.co/3vqcDT47pb Massive web datasets play a key role in the success of large vision-language models like CLIP and Flamingo. However, the raw web data is noisy, and existing filtering methods to reduce noise… https://t.co/UBg8wqSdhy https://t.co/h1xpF6oyhG

View on Twitter

📊 Media Metadata

{
  "media": [
    {
      "url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1682236761869565952/media_0.jpg",
      "type": "photo",
      "original_url": "http://pbs.twimg.com/media/F1iBaGTWwAAawPD.jpg",
      "download_date": "2025-08-13T05:52:08.448045",
      "stored_in_supabase": true,
      "format_converted_from_list": true
    }
  ],
  "conversion_date": "2025-08-13T00:32:29.660592",
  "format_converted": true,
  "original_structure": "had_media_only"
}

🔧 Raw API Response

{
  "created_at": "Fri Jul 21 03:51:13 +0000 2023",
  "id": 1682236761869565952,
  "id_str": "1682236761869565952",
  "full_text": "Improving Multimodal Datasets with Image Captioning\n\npaper page: https://t.co/3vqcDT47pb\n\nMassive web datasets play a key role in the success of large vision-language models like CLIP and Flamingo. However, the raw web data is noisy, and existing filtering methods to reduce noise… https://t.co/UBg8wqSdhy https://t.co/h1xpF6oyhG",
  "truncated": false,
  "display_text_range": [
    0,
    305
  ],
  "entities": {
    "hashtags": [],
    "symbols": [],
    "user_mentions": [],
    "urls": [
      {
        "url": "https://t.co/3vqcDT47pb",
        "expanded_url": "https://huggingface.co/papers/2307.10350",
        "display_url": "huggingface.co/papers/2307.10…",
        "indices": [
          65,
          88
        ]
      },
      {
        "url": "https://t.co/UBg8wqSdhy",
        "expanded_url": "https://twitter.com/i/web/status/1682236761869565952",
        "display_url": "twitter.com/i/web/status/1…",
        "indices": [
          282,
          305
        ]
      }
    ],
    "media": [
      {
        "id": 1682236746191257600,
        "id_str": "1682236746191257600",
        "indices": [
          306,
          329
        ],
        "media_url": "http://pbs.twimg.com/media/F1iBaGTWwAAawPD.jpg",
        "media_url_https": "https://pbs.twimg.com/media/F1iBaGTWwAAawPD.jpg",
        "url": "https://t.co/h1xpF6oyhG",
        "display_url": "pic.twitter.com/h1xpF6oyhG",
        "expanded_url": "https://twitter.com/_akhaliq/status/1682236761869565952/photo/1",
        "type": "photo",
        "sizes": {
          "thumb": {
            "w": 150,
            "h": 150,
            "resize": "crop"
          },
          "large": {
            "w": 822,
            "h": 998,
            "resize": "fit"
          },
          "medium": {
            "w": 822,
            "h": 998,
            "resize": "fit"
          },
          "small": {
            "w": 560,
            "h": 680,
            "resize": "fit"
          }
        }
      }
    ]
  },
  "extended_entities": {
    "media": [
      {
        "id": 1682236746191257600,
        "id_str": "1682236746191257600",
        "indices": [
          306,
          329
        ],
        "media_url": "http://pbs.twimg.com/media/F1iBaGTWwAAawPD.jpg",
        "media_url_https": "https://pbs.twimg.com/media/F1iBaGTWwAAawPD.jpg",
        "url": "https://t.co/h1xpF6oyhG",
        "display_url": "pic.twitter.com/h1xpF6oyhG",
        "expanded_url": "https://twitter.com/_akhaliq/status/1682236761869565952/photo/1",
        "type": "photo",
        "sizes": {
          "thumb": {
            "w": 150,
            "h": 150,
            "resize": "crop"
          },
          "large": {
            "w": 822,
            "h": 998,
            "resize": "fit"
          },
          "medium": {
            "w": 822,
            "h": 998,
            "resize": "fit"
          },
          "small": {
            "w": 560,
            "h": 680,
            "resize": "fit"
          }
        }
      }
    ]
  },
  "source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>",
  "in_reply_to_status_id": null,
  "in_reply_to_status_id_str": null,
  "in_reply_to_user_id": null,
  "in_reply_to_user_id_str": null,
  "in_reply_to_screen_name": null,
  "user": {
    "id": 2465283662,
    "id_str": "2465283662",
    "name": "AK",
    "screen_name": "_akhaliq",
    "location": "subscribe → ",
    "description": "AI research paper tweets, ML @Gradio (acq. by @HuggingFace 🤗)\n\ndm for promo",
    "url": "https://t.co/TbGnXZJwEc",
    "entities": {
      "url": {
        "urls": [
          {
            "url": "https://t.co/TbGnXZJwEc",
            "expanded_url": "https://akhaliq.substack.com/",
            "display_url": "akhaliq.substack.com",
            "indices": [
              0,
              23
            ]
          }
        ]
      },
      "description": {
        "urls": []
      }
    },
    "protected": false,
    "followers_count": 218436,
    "friends_count": 1835,
    "listed_count": 2907,
    "created_at": "Sun Apr 27 00:20:12 +0000 2014",
    "favourites_count": 25556,
    "utc_offset": null,
    "time_zone": null,
    "geo_enabled": false,
    "verified": false,
    "statuses_count": 20374,
    "lang": null,
    "contributors_enabled": false,
    "is_translator": false,
    "is_translation_enabled": false,
    "profile_background_color": "C0DEED",
    "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
    "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
    "profile_background_tile": false,
    "profile_image_url": "http://pbs.twimg.com/profile_images/1451191636810092553/kpM5Fe12_normal.jpg",
    "profile_image_url_https": "https://pbs.twimg.com/profile_images/1451191636810092553/kpM5Fe12_normal.jpg",
    "profile_banner_url": "https://pbs.twimg.com/profile_banners/2465283662/1610997549",
    "profile_link_color": "1DA1F2",
    "profile_sidebar_border_color": "C0DEED",
    "profile_sidebar_fill_color": "DDEEF6",
    "profile_text_color": "333333",
    "profile_use_background_image": true,
    "has_extended_profile": true,
    "default_profile": true,
    "default_profile_image": false,
    "following": true,
    "follow_request_sent": false,
    "notifications": false,
    "translator_type": "none",
    "withheld_in_countries": []
  },
  "geo": null,
  "coordinates": null,
  "place": null,
  "contributors": null,
  "is_quote_status": false,
  "retweet_count": 26,
  "favorite_count": 141,
  "favorited": false,
  "retweeted": false,
  "possibly_sensitive": false,
  "possibly_sensitive_appealable": false,
  "lang": "en"
}