################################### # This is a robots.txt borrowed heavily from http://www.YouPosted.com # Due to the compatibility issues between different bots and whether they # support # wildcards (*), multiple user-agents and end-anchors ($), then # seperate User-agents are provided for each. # # Some Disallowed Urls in YouPosted robots.txt does not exist, but still # provided for in case of future SMF board enhancements. # # Our Sitemap - Uses a rewrite rule for the dynamic sitemap generator. Sitemap: http://www.thebandofgonzos.com/sitemap.xml # Google - Most Important bot # Unfortunately a robots.txt will only stop it crawling certain urls, and NOT adding any # urls which it comes across into its index. So we're relying on a meta noindex tag. User-agent: Googlebot # Don't index mobile versions Disallow: /community/index.php?*;wap Disallow: /community/index.php?*;wap2 Disallow: /community/index.php?*;imode # Yahoo - Too aggressive # So limit it as much as possible. User-agent: Slurp # Disallow Everything Disallow: / # Now allow bits and then disallow bits # This will need to be updated if other site software # is installed in the future (ie WordPress) #Allow: /sitemap.xml$ Allow: /robots.txt$ Allow: /index.php$ Allow: /community/index.php$ Allow: /community/index.php?topic=*.0$ Allow: /community/index.php?topic=*.*0$ Allow: /community/index.php?topic=*.*5$ Allow: /community/index.php?board=*.0$ Allow: /community/index.php?board=*.*0$ Allow: /community/index.php?board=*.*5$ # But don't allow these Disallow: /community/index.php?*.msg Disallow: /community/index.php?topic=*.msg*0$ Disallow: /community/index.php?topic=*.msg*5$ Disallow: /community/index.php?*.new # Anything with a ; disallow Disallow: /community/index.php?*;* # Arcade Related - Not implemented here #Allow: /community/index.php?action=arcade$ #Allow: /community/index.php?action=stats$ #Allow: /community/index.php?action=arcade;sa=play;game= # Bad bot - Often ignores robots.txt - Waste of bandwidth # Despite claiming on their website to be a search engine in development # I'm suspicious as to whether they are a harvester pretending to be SE User-agent: Twiceler Disallow: / User-Agent: W3C-checklink Disallow: / # Stop following PHPSESSID's User-Agent: MJ12bot Disallow: /community/index.php?PHPSESSID # Catch all (remainder) # Will be followed by any bots other than ones identified above # Uses BASIC robots.txt directives without wildcards, end-anchors etc # So Spiders should understand these (including MSNBOT) User-agent: * # Default SMF Folders Disallow: /community/attachments/ Disallow: /community/Packages/ Disallow: /community/Smileys/ Disallow: /community/Sources/ Disallow: /community/Themes/ # Default SMF Actions Disallow: /community/index.php?action=activate Disallow: /community/index.php?action=admin Disallow: /community/index.php?action=calendar Disallow: /community/index.php?action=emailuser Disallow: /community/index.php?action=findmember Disallow: /community/index.php?action=help Disallow: /community/index.php?action=helpadmin Disallow: /community/index.php?action=login Disallow: /community/index.php?action=logout Disallow: /community/index.php?action=mlist Disallow: /community/index.php?action=modifykarma Disallow: /community/index.php?action=pm Disallow: /community/index.php?action=post Disallow: /community/index.php?action=printpage Disallow: /community/index.php?action=profile Disallow: /community/index.php?action=recent Disallow: /community/index.php?action=register Disallow: /community/index.php?action=reminder Disallow: /community/index.php?action=search Disallow: /community/index.php?action=theme Disallow: /community/index.php?action=unread Disallow: /community/index.php?action=unreadreplies Disallow: /community/index.php?action=verificationcode Disallow: /community/index.php?action=who Disallow: /community/index.php?theme # SMF Mod Related (A lot of these are not implemented here) Disallow: /community/archive.php Disallow: /community/index.php?action=blog Disallow: /community/index.php?action=viewblog Disallow: /community/index.php?action=chess Disallow: /community/index.php?action=comment Disallow: /community/index.php?action=downloads Disallow: /community/index.php?action=links Disallow: /community/index.php?action=reporttm Disallow: /community/index.php?action=recenttopics Disallow: /community/index.php?action=mm Disallow: /community/index.php?action=sitemap Disallow: /community/index.php?action=staff Disallow: /community/index.php?action=tags Disallow: /community/index.php?action=thankyou Disallow: /community/index.php?action=viewkarma Disallow: /community/index.php?action=viewers Disallow: /community/index.php?f= Disallow: /community/index.php?filter Disallow: /community/index.php?referredby Disallow: /community/Games/ Disallow: /community/Downloads/ Disallow: /community/index.php?action=arcade;favorites Disallow: /community/index.php?action=arcade;sa=highscore Disallow: /community/index.php?action=arcade;sa=play;random Disallow: /community/index.php?action=arcade;category Disallow: /community/index.php?action=arcade;sort Disallow: /community/index.php?action=arcade;stats Disallow: /community/index.php?action=stats;expand Disallow: /community/index.php?action=stats;collapse