# ============================================ # Global Banking & Finance Review # robots.txt - Optimized for SEO, GEO & AEO # ============================================ # ============================================ # AI Search & Generative Engine Crawlers (GEO/AEO) # ============================================ # OpenAI Crawlers User-agent: OAI-SearchBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: GPTBot Allow: / # Anthropic AI Crawlers User-agent: ClaudeBot Allow: / User-agent: anthropic-ai Allow: / # Google AI Crawlers User-agent: Google-Extended Allow: / # Perplexity AI Crawlers User-agent: PerplexityBot Allow: / User-agent: Perplexity-User Allow: / # X/Twitter AI Crawler User-agent: GrokBot Allow: / # Apple AI Crawlers User-agent: Applebot Allow: / User-agent: Applebot-Extended Allow: / # Meta AI Crawler User-agent: FacebookBot Allow: / User-agent: Meta-ExternalAgent Allow: / User-agent: Meta-ExternalFetcher Allow: / # Microsoft/Bing AI Crawlers User-agent: bingbot Allow: / User-agent: BingPreview Allow: / # Common Crawl (used for AI training datasets) User-agent: CCBot Allow: / # Cohere AI User-agent: cohere-ai Allow: / # Amazon AI User-agent: Amazonbot Allow: / # ByteDance/TikTok AI User-agent: Bytespider Allow: / # You.com AI Search User-agent: YouBot Allow: / # Neeva AI Search User-agent: NeevaBot Allow: / # Brave Search User-agent: BraveBot Allow: / # DuckDuckGo User-agent: DuckDuckBot Allow: / # Mojeek Search User-agent: MojeekBot Allow: / # Yep Search User-agent: YepBot Allow: / # Additional Emerging AI Crawlers User-agent: MistralBot Allow: / User-agent: Mistral-AI Allow: / User-agent: PiBot Allow: / User-agent: Inflection-ai Allow: / User-agent: Omgilibot Allow: / User-agent: FacebookExternalHit Allow: / User-agent: ia_archiver Allow: / User-agent: Diffbot Allow: / User-agent: Baiduspider-render Allow: / User-agent: 360Spider Allow: / User-agent: Sogou web spider Allow: / # ============================================ # Traditional Search Engine Crawlers (SEO) # ============================================ # Google User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / User-agent: Googlebot-Video Allow: / User-agent: Storebot-Google Allow: / User-agent: Google-InspectionTool Allow: / # Microsoft/Bing User-agent: msnbot Allow: / User-agent: msnbot-media Allow: / # Yahoo User-agent: Slurp Allow: / # Yandex User-agent: YandexBot Allow: / User-agent: YandexImages Allow: / User-agent: YandexNews Allow: / # Baidu User-agent: Baiduspider Allow: / User-agent: Baiduspider-image Allow: / # Sogou User-agent: Sogou Allow: / # Naver (Korean search) User-agent: Yeti Allow: / # Seznam (Czech search) User-agent: SeznamBot Allow: / # Qwant (EU search) User-agent: Qwantify Allow: / # ============================================ # Social Media Crawlers (for rich previews) # ============================================ User-agent: facebookexternalhit Allow: / User-agent: Twitterbot Allow: / User-agent: LinkedInBot Allow: / User-agent: Pinterest Allow: / User-agent: Slackbot Allow: / User-agent: TelegramBot Allow: / User-agent: WhatsApp Allow: / User-agent: Discordbot Allow: / # ============================================ # SEO & Analytics Tools # ============================================ User-agent: AhrefsBot Allow: / User-agent: SemrushBot Allow: / User-agent: MJ12bot Allow: / User-agent: DotBot Allow: / User-agent: rogerbot Allow: / User-agent: Screaming Frog SEO Spider Allow: / User-agent: archive.org_bot Allow: / User-agent: MauiBot Allow: / User-agent: BLEXBot Allow: / User-agent: DataForSeoBot Allow: / User-agent: MegaIndex Allow: / User-agent: SiteAuditBot Allow: / User-agent: Sitebulb Allow: / User-agent: Lighthouse Allow: / User-agent: PageSpeed Insights Allow: / # ============================================ # Default Rules for All Other Crawlers # ============================================ User-agent: * # Allow rules first (compatibility: some crawlers use first-match; Allow before Disallow for same path) Allow: /*.xml$ Allow: / Allow: /_next/static/ Allow: /fonts/ Allow: /images/ Allow: /public/images/ Allow: /_next/image Allow: /_next/data/ Allow: /html-sitemap/ Allow: /feed/ Allow: /sitemap.xml Allow: /sitemaps/ Allow: /sitemap-news.xml Allow: /sitemap-image.xml # Block only sensitive/private areas Disallow: /admin/ Disallow: /api/draft-mode/ Disallow: /api/auth/ Disallow: /api/revalidate/ Disallow: /api/google-indexing/ Disallow: /api/sendEmail/ Disallow: /api/verify-captcha/ # Block internal API endpoints (not needed for indexing) Disallow: /api/categories/ Disallow: /api/tags/ Disallow: /api/navigation Disallow: /api/company # Block search results pages (thin content, duplicate) Disallow: /search? Disallow: /search/? # Block Sanity Studio Disallow: /studio/ # Block error pages (specific paths under _next/static; broader Allow above applies to rest) Disallow: /_next/static/chunks/pages/_error Disallow: /_next/static/chunks/pages/404 Disallow: /_next/static/chunks/pages/500 # Allow everything else (reiterate for clarity after Disallow list) Allow: / # ============================================ # Allow All Images (Local & Proxied CDN) # ============================================ # Local images directory Allow: /images/ Allow: /public/images/ # Next.js Image Optimization (proxies Sanity & Cloudflare images) Allow: /_next/image Allow: /_next/image?* Allow: /_next/image* # All image file extensions Allow: /*.png Allow: /*.jpg Allow: /*.jpeg Allow: /*.gif Allow: /*.webp Allow: /*.avif Allow: /*.svg Allow: /*.ico Allow: /*.bmp Allow: /*.tiff # Note: External CDN images (cdn.sanity.io, imagedelivery.net) are served # from their own domains and are not controlled by this robots.txt. # Those domains allow all crawlers by default. # ============================================ # Allow Required JSON Files # ============================================ # Next.js build manifests & static JSON Allow: /_next/static/*.json Allow: /_next/data/ # PWA & Browser Config Allow: /manifest.json Allow: /browserconfig.xml Allow: /site.webmanifest # RSS/Atom/JSON Feeds Allow: /feed/ Allow: /*.rss Allow: /*.atom Allow: /rss.xml Allow: /atom.xml Allow: /feed.json # Structured Data & Schema Allow: /schema.json Allow: /*.jsonld # ads.txt for advertising Allow: /ads.txt # LLMs.txt for AI crawlers Allow: /llms.txt # ============================================ # Allow All Sitemaps (XML & HTML) - Max indexing # ============================================ # XML Sitemap Index Allow: /sitemap.xml Allow: /sitemaps/sitemap-index.xml # Static Sitemap Directory Allow: /sitemaps/ Allow: /sitemaps/* # Static Sitemaps (explicit for crawlers that use Allow for crawl budget) Allow: /sitemaps/sitemap-static.xml Allow: /sitemaps/sitemap-categories.xml Allow: /sitemaps/sitemap-tags.xml Allow: /sitemaps/sitemap-authors.xml Allow: /sitemaps/sitemap-posts- # News Sitemaps Allow: /sitemap-news.xml Allow: /sitemap-news/ Allow: /sitemap-news/* # Image Sitemaps Allow: /sitemap-image.xml Allow: /sitemap-image/ Allow: /sitemap-image/* # Paginated Post Sitemaps (legacy path if used) Allow: /sitemap/ Allow: /sitemap/* # HTML Sitemaps (for users and SEO discovery) Allow: /html-sitemap/ Allow: /html-sitemap/* # ============================================ # Sitemaps Declaration (2026 Best Practice) # ============================================ # Per 2026 guidelines: Use ONE main sitemap index that references all sub-sitemaps. # Multiple Sitemap lines below provide redundancy for crawlers (Google allows multiple). # The main sitemap.xml contains references to all static and dynamic sitemaps. Sitemap: https://www.globalbankingandfinance.com/sitemap.xml Sitemap: https://www.globalbankingandfinance.com/sitemap-news.xml Sitemap: https://www.globalbankingandfinance.com/sitemap-image.xml Sitemap: https://www.globalbankingandfinance.com/sitemaps/sitemap-static.xml # ============================================ # LLMs.txt (AI-readable site information) # ============================================ # For AI assistants and LLMs to understand site structure # See: https://www.globalbankingandfinance.com/llms.txt