@omarsar0
Multimodal Large Language Models: A Survey Nice graph depicting the evolution from foundational transformer and diffusion structures. All references in the paper: https://t.co/HCbMPkXYn8
Viewing enriched Twitter post
Multimodal Large Language Models: A Survey Nice graph depicting the evolution from foundational transformer and diffusion structures. All references in the paper: https://t.co/HCbMPkXYn8
{
"media": [
{
"type": "image",
"url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1933612480451793179/media_0.jpg?",
"filename": "media_0.jpg"
}
],
"nlp": {
"sentiment": "positive",
"topics": [
"multimodal models",
"language models",
"graph evolution"
],
"entities": [],
"summary": "The tweet appreciates a graph that illustrates the evolution of multimodal large language models.",
"language": "en",
"processed_at": "2025-08-07T11:17:24.949489"
}
} {
"data": {
"threaded_conversation_with_injections_v2": {
"instructions": [
{
"type": "TimelineClearCache"
},
{
"type": "TimelineAddEntries",
"entries": [
{
"entryId": "tweet-1933612478258180576",
"sortIndex": "1953505760885342208",
"content": {
"entryType": "TimelineTimelineItem",
"__typename": "TimelineTimelineItem",
"itemContent": {
"itemType": "TimelineTweet",
"__typename": "TimelineTweet",
"tweet_results": {
"result": {
"__typename": "Tweet",
"rest_id": "1933612478258180576",
"has_birdwatch_notes": false,
"core": {
"user_results": {
"result": {
"__typename": "User",
"id": "VXNlcjozNDQ4Mjg0MzEz",
"rest_id": "3448284313",
"affiliates_highlighted_label": {},
"has_graduated_access": true,
"is_blue_verified": true,
"profile_image_shape": "Circle",
"legacy": {
"can_dm": true,
"can_media_tag": true,
"created_at": "Fri Sep 04 12:59:26 +0000 2015",
"default_profile": false,
"default_profile_image": false,
"description": "Building with AI agents @dair_ai • Prev: Meta AI, Galactica LLM, Elastic, PaperswithCode, PhD • I share insights on how to build with AI Agents ⬇️",
"entities": {
"description": {
"urls": []
},
"url": {
"urls": [
{
"display_url": "dair-ai.thinkific.com",
"expanded_url": "https://dair-ai.thinkific.com/",
"url": "https://t.co/JBU5beHQNs",
"indices": [
0,
23
]
}
]
}
},
"fast_followers_count": 0,
"favourites_count": 31675,
"followers_count": 258922,
"friends_count": 643,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 4290,
"location": "",
"media_count": 3767,
"name": "elvis",
"normal_followers_count": 258922,
"pinned_tweet_ids_str": [
"1953152163078635748"
],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/3448284313/1565974901",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/939313677647282181/vZjFWtAn_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "omarsar0",
"statuses_count": 15303,
"translator_type": "regular",
"url": "https://t.co/JBU5beHQNs",
"verified": false,
"want_retweets": false,
"withheld_in_countries": []
},
"professional": {
"rest_id": "1460724937073770505",
"professional_type": "Creator",
"category": [
{
"id": 713,
"name": "Science & Technology",
"icon_name": ""
}
]
},
"tipjar_settings": {
"is_enabled": false,
"cash_app_handle": "£elvisomarsaravia"
}
}
}
},
"unmention_data": {},
"edit_control": {
"edit_tweet_ids": [
"1933612478258180576"
],
"editable_until_msecs": "1749847714000",
"is_edit_eligible": false,
"edits_remaining": "5"
},
"is_translatable": false,
"views": {
"count": "61430",
"state": "EnabledWithCount"
},
"source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>",
"legacy": {
"bookmark_count": 529,
"bookmarked": false,
"created_at": "Fri Jun 13 19:48:34 +0000 2025",
"conversation_id_str": "1933612478258180576",
"display_text_range": [
0,
165
],
"entities": {
"hashtags": [],
"media": [
{
"display_url": "pic.x.com/HCbMPkXYn8",
"expanded_url": "https://x.com/omarsar0/status/1933612478258180576/photo/1",
"id_str": "1933611708242694144",
"indices": [
166,
189
],
"media_key": "3_1933611708242694144",
"media_url_https": "https://pbs.twimg.com/media/GtWRmptXkAALmxN.jpg",
"type": "photo",
"url": "https://t.co/HCbMPkXYn8",
"ext_media_availability": {
"status": "Available"
},
"features": {
"large": {
"faces": []
},
"medium": {
"faces": []
},
"small": {
"faces": []
},
"orig": {
"faces": []
}
},
"sizes": {
"large": {
"h": 846,
"w": 1296,
"resize": "fit"
},
"medium": {
"h": 783,
"w": 1200,
"resize": "fit"
},
"small": {
"h": 444,
"w": 680,
"resize": "fit"
},
"thumb": {
"h": 150,
"w": 150,
"resize": "crop"
}
},
"original_info": {
"height": 846,
"width": 1296,
"focus_rects": [
{
"x": 0,
"y": 0,
"w": 1296,
"h": 726
},
{
"x": 450,
"y": 0,
"w": 846,
"h": 846
},
{
"x": 554,
"y": 0,
"w": 742,
"h": 846
},
{
"x": 857,
"y": 0,
"w": 423,
"h": 846
},
{
"x": 0,
"y": 0,
"w": 1296,
"h": 846
}
]
},
"allow_download_status": {
"allow_download": true
},
"media_results": {
"result": {
"media_key": "3_1933611708242694144"
}
}
}
],
"symbols": [],
"timestamps": [],
"urls": [],
"user_mentions": []
},
"extended_entities": {
"media": [
{
"display_url": "pic.x.com/HCbMPkXYn8",
"expanded_url": "https://x.com/omarsar0/status/1933612478258180576/photo/1",
"id_str": "1933611708242694144",
"indices": [
166,
189
],
"media_key": "3_1933611708242694144",
"media_url_https": "https://pbs.twimg.com/media/GtWRmptXkAALmxN.jpg",
"type": "photo",
"url": "https://t.co/HCbMPkXYn8",
"ext_media_availability": {
"status": "Available"
},
"features": {
"large": {
"faces": []
},
"medium": {
"faces": []
},
"small": {
"faces": []
},
"orig": {
"faces": []
}
},
"sizes": {
"large": {
"h": 846,
"w": 1296,
"resize": "fit"
},
"medium": {
"h": 783,
"w": 1200,
"resize": "fit"
},
"small": {
"h": 444,
"w": 680,
"resize": "fit"
},
"thumb": {
"h": 150,
"w": 150,
"resize": "crop"
}
},
"original_info": {
"height": 846,
"width": 1296,
"focus_rects": [
{
"x": 0,
"y": 0,
"w": 1296,
"h": 726
},
{
"x": 450,
"y": 0,
"w": 846,
"h": 846
},
{
"x": 554,
"y": 0,
"w": 742,
"h": 846
},
{
"x": 857,
"y": 0,
"w": 423,
"h": 846
},
{
"x": 0,
"y": 0,
"w": 1296,
"h": 846
}
]
},
"allow_download_status": {
"allow_download": true
},
"media_results": {
"result": {
"media_key": "3_1933611708242694144"
}
}
}
]
},
"favorite_count": 531,
"favorited": false,
"full_text": "Multimodal Large Language Models: A Survey\n\nNice graph depicting the evolution from foundational transformer and diffusion structures. \n\nAll references in the paper: https://t.co/HCbMPkXYn8",
"is_quote_status": false,
"lang": "en",
"possibly_sensitive": false,
"possibly_sensitive_editable": true,
"quote_count": 4,
"reply_count": 14,
"retweet_count": 96,
"retweeted": false,
"user_id_str": "3448284313",
"id_str": "1933612478258180576"
},
"quick_promote_eligibility": {
"eligibility": "IneligibleNotProfessional"
}
}
},
"tweetDisplayType": "Tweet"
},
"clientEventInfo": {
"component": "tweet",
"element": "tweet"
}
}
},
{
"entryId": "tweet-1933612480451793179",
"sortIndex": "1953505760885342198",
"content": {
"entryType": "TimelineTimelineItem",
"__typename": "TimelineTimelineItem",
"itemContent": {
"itemType": "TimelineTweet",
"__typename": "TimelineTweet",
"tweet_results": {
"result": {
"__typename": "Tweet",
"rest_id": "1933612480451793179",
"has_birdwatch_notes": false,
"core": {
"user_results": {
"result": {
"__typename": "User",
"id": "VXNlcjozNDQ4Mjg0MzEz",
"rest_id": "3448284313",
"affiliates_highlighted_label": {},
"has_graduated_access": true,
"is_blue_verified": true,
"profile_image_shape": "Circle",
"legacy": {
"can_dm": true,
"can_media_tag": true,
"created_at": "Fri Sep 04 12:59:26 +0000 2015",
"default_profile": false,
"default_profile_image": false,
"description": "Building with AI agents @dair_ai • Prev: Meta AI, Galactica LLM, Elastic, PaperswithCode, PhD • I share insights on how to build with AI Agents ⬇️",
"entities": {
"description": {
"urls": []
},
"url": {
"urls": [
{
"display_url": "dair-ai.thinkific.com",
"expanded_url": "https://dair-ai.thinkific.com/",
"url": "https://t.co/JBU5beHQNs",
"indices": [
0,
23
]
}
]
}
},
"fast_followers_count": 0,
"favourites_count": 31675,
"followers_count": 258922,
"friends_count": 643,
"has_custom_timelines": true,
"is_translator": false,
"listed_count": 4290,
"location": "",
"media_count": 3767,
"name": "elvis",
"normal_followers_count": 258922,
"pinned_tweet_ids_str": [
"1953152163078635748"
],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/3448284313/1565974901",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/939313677647282181/vZjFWtAn_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "omarsar0",
"statuses_count": 15303,
"translator_type": "regular",
"url": "https://t.co/JBU5beHQNs",
"verified": false,
"want_retweets": false,
"withheld_in_countries": []
},
"professional": {
"rest_id": "1460724937073770505",
"professional_type": "Creator",
"category": [
{
"id": 713,
"name": "Science & Technology",
"icon_name": ""
}
]
},
"tipjar_settings": {
"is_enabled": false,
"cash_app_handle": "£elvisomarsaravia"
}
}
}
},
"card": {
"rest_id": "https://t.co/4kf2bS5AyT",
"legacy": {
"binding_values": [
{
"key": "thumbnail_image",
"value": {
"image_value": {
"alt": "arXiv logo",
"height": 144,
"width": 144,
"url": "https://pbs.twimg.com/card_img/1949758743958425600/kRc27OMH?format=jpg&name=144x144_2"
},
"type": "IMAGE"
}
},
{
"key": "description",
"value": {
"string_value": "Multimodal Large Language Models (MLLMs) have rapidly evolved beyond text generation, now spanning diverse output modalities including images, music, video, human motion, and 3D objects, by...",
"type": "STRING"
}
},
{
"key": "domain",
"value": {
"string_value": "arxiv.org",
"type": "STRING"
}
},
{
"key": "thumbnail_image_large",
"value": {
"image_value": {
"alt": "arXiv logo",
"height": 420,
"width": 420,
"url": "https://pbs.twimg.com/card_img/1949758743958425600/kRc27OMH?format=jpg&name=420x420_2"
},
"type": "IMAGE"
}
},
{
"key": "thumbnail_image_original",
"value": {
"image_value": {
"alt": "arXiv logo",
"height": 1000,
"width": 1000,
"url": "https://pbs.twimg.com/card_img/1949758743958425600/kRc27OMH?format=jpg&name=orig"
},
"type": "IMAGE"
}
},
{
"key": "site",
"value": {
"scribe_key": "publisher_id",
"type": "USER",
"user_value": {
"id_str": "808633423300624384",
"path": []
}
}
},
{
"key": "thumbnail_image_small",
"value": {
"image_value": {
"alt": "arXiv logo",
"height": 100,
"width": 100,
"url": "https://pbs.twimg.com/card_img/1949758743958425600/kRc27OMH?format=jpg&name=100x100_2"
},
"type": "IMAGE"
}
},
{
"key": "thumbnail_image_x_large",
"value": {
"image_value": {
"alt": "arXiv logo",
"height": 1000,
"width": 1000,
"url": "https://pbs.twimg.com/card_img/1949758743958425600/kRc27OMH?format=png&name=2048x2048_2_exp"
},
"type": "IMAGE"
}
},
{
"key": "thumbnail_image_alt_text",
"value": {
"string_value": "arXiv logo",
"type": "STRING"
}
},
{
"key": "vanity_url",
"value": {
"scribe_key": "vanity_url",
"string_value": "arxiv.org",
"type": "STRING"
}
},
{
"key": "thumbnail_image_color",
"value": {
"image_color_value": {
"palette": [
{
"rgb": {
"blue": 255,
"green": 255,
"red": 255
},
"percentage": 94.17
},
{
"rgb": {
"blue": 105,
"green": 116,
"red": 124
},
"percentage": 4.33
},
{
"rgb": {
"blue": 46,
"green": 21,
"red": 170
},
"percentage": 1.26
},
{
"rgb": {
"blue": 131,
"green": 116,
"red": 203
},
"percentage": 0.23
}
]
},
"type": "IMAGE_COLOR"
}
},
{
"key": "title",
"value": {
"string_value": "A Survey of Generative Categories and Techniques in Multimodal...",
"type": "STRING"
}
},
{
"key": "card_url",
"value": {
"scribe_key": "card_url",
"string_value": "https://t.co/4kf2bS5AyT",
"type": "STRING"
}
}
],
"card_platform": {
"platform": {
"audience": {
"name": "production"
},
"device": {
"name": "Android",
"version": "12"
}
}
},
"name": "summary",
"url": "https://t.co/4kf2bS5AyT",
"user_refs_results": [
{
"result": {
"__typename": "User",
"id": "VXNlcjo4MDg2MzM0MjMzMDA2MjQzODQ=",
"rest_id": "808633423300624384",
"affiliates_highlighted_label": {},
"has_graduated_access": true,
"is_blue_verified": false,
"profile_image_shape": "Circle",
"legacy": {
"can_dm": true,
"can_media_tag": true,
"created_at": "Tue Dec 13 11:23:26 +0000 2016",
"default_profile": false,
"default_profile_image": false,
"description": "News from https://t.co/enurGFxpcS, a free distribution service and an open archive for scholarly articles.\n\nFor help with arXiv, see https://t.co/LcWuhM0BOl",
"entities": {
"description": {
"urls": [
{
"display_url": "arXiv.org",
"expanded_url": "http://arXiv.org",
"url": "https://t.co/enurGFxpcS",
"indices": [
10,
33
]
},
{
"display_url": "arxiv.org/help",
"expanded_url": "https://arxiv.org/help",
"url": "https://t.co/LcWuhM0BOl",
"indices": [
133,
156
]
}
]
},
"url": {
"urls": [
{
"display_url": "arxiv.org",
"expanded_url": "https://arxiv.org/",
"url": "https://t.co/DHMkdi4lF9",
"indices": [
0,
23
]
}
]
}
},
"fast_followers_count": 0,
"favourites_count": 948,
"followers_count": 41363,
"friends_count": 187,
"has_custom_timelines": false,
"is_translator": false,
"listed_count": 487,
"location": "Ithaca, NY",
"media_count": 109,
"name": "arXiv.org",
"normal_followers_count": 41363,
"pinned_tweet_ids_str": [],
"possibly_sensitive": false,
"profile_banner_url": "https://pbs.twimg.com/profile_banners/808633423300624384/1481635469",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1365352170267299840/IzvjKckL_normal.jpg",
"profile_interstitial_type": "",
"screen_name": "arxiv",
"statuses_count": 1106,
"translator_type": "none",
"url": "https://t.co/DHMkdi4lF9",
"verified": false,
"want_retweets": false,
"withheld_in_countries": []
},
"tipjar_settings": {}
}
}
]
}
},
"unmention_data": {},
"edit_control": {
"edit_tweet_ids": [
"1933612480451793179"
],
"editable_until_msecs": "1749847714000",
"is_edit_eligible": false,
"edits_remaining": "5"
},
"is_translatable": false,
"views": {
"count": "5411",
"state": "EnabledWithCount"
},
"source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>",
"legacy": {
"bookmark_count": 30,
"bookmarked": false,
"created_at": "Fri Jun 13 19:48:34 +0000 2025",
"conversation_id_str": "1933612478258180576",
"display_text_range": [
0,
23
],
"entities": {
"hashtags": [],
"symbols": [],
"timestamps": [],
"urls": [
{
"display_url": "arxiv.org/abs/2506.10016",
"expanded_url": "https://arxiv.org/abs/2506.10016",
"url": "https://t.co/4kf2bS5AyT",
"indices": [
0,
23
]
}
],
"user_mentions": []
},
"favorite_count": 34,
"favorited": false,
"full_text": "https://t.co/4kf2bS5AyT",
"in_reply_to_screen_name": "omarsar0",
"in_reply_to_status_id_str": "1933612478258180576",
"in_reply_to_user_id_str": "3448284313",
"is_quote_status": false,
"lang": "zxx",
"possibly_sensitive": false,
"possibly_sensitive_editable": true,
"quote_count": 1,
"reply_count": 0,
"retweet_count": 3,
"retweeted": false,
"user_id_str": "3448284313",
"id_str": "1933612480451793179"
},
"quick_promote_eligibility": {
"eligibility": "IneligibleNotProfessional"
}
}
},
"tweetDisplayType": "SelfThread"
},
"clientEventInfo": {
"component": "tweet",
"element": "tweet"
}
}
},
{
"entryId": "cursor-bottom-1953505760885342197",
"sortIndex": "1953505760885342197",
"content": {
"entryType": "TimelineTimelineCursor",
"__typename": "TimelineTimelineCursor",
"value": "DAAFCgABGxw_I7A___MLAAIAAAAwRW1QQzZ3QUFBZlEvZ0dKTjB2R3AvQUFBQUFJYTFaSk9acGRSR3hyVmtrM2oxM0hnCAADAAAAAgAA",
"cursorType": "Bottom"
}
}
]
},
{
"type": "TimelineTerminateTimeline",
"direction": "Top"
},
{
"type": "TimelineTerminateTimeline",
"direction": "Bottom"
}
],
"metadata": {
"reader_mode_config": {
"is_reader_mode_available": true
},
"scribeConfig": {
"page": "ranked_replies"
}
}
}
}
}