+ Antworten
Ergebnis 1 bis 2 von 2

Thema: Spider-Useragenten Liste

  1. #1
    TP-Member Jenny18 macht alles soweit korrekt
    Registriert seit
    Sep 2001
    Beiträge
    59

    Question Spider-Useragenten Liste

    hi,

    weiss jemand ob es irgendwo eine Liste der Useragenten über den man einen Spider identifizieren kann (wie z.b. bei Google 'Googlebot/2.1') gibt?

    Jenny

  2. #2
    TP-Insider #!/usr/bin/beer bringt sich richtig ein #!/usr/bin/beer bringt sich richtig ein Avatar von #!/usr/bin/beer
    Registriert seit
    Jan 2002
    Ort
    Akhragan
    Beiträge
    791
    hier:
    http://www.jafsoft.com/searchengines/webbots.html
    und hier:
    http://www.robotstxt.org/wc/active/all.txt

    und noch die hier als array aus einem perl-script:

    # RobotsHashIDLib
    # List of robots names ("robot id","robot clear text")

    %RobotsHashIDLib = (
    "acme.spider","Acme.Spider",
    "ahoythehomepagefinder","Ahoy! The Homepage Finder",
    "alkaline","Alkaline",
    "appie","Walhello appie",
    "arachnophilia","Arachnophilia",
    "architext","ArchitextSpider",
    "aretha","Aretha",
    "ariadne","ARIADNE",
    "arks","arks",
    "aspider","ASpider (Associative Spider)",
    "atn.txt","ATN Worldwide",
    "atomz","Atomz.com Search Robot",
    "auresys","AURESYS",
    "backrub","BackRub",
    "bigbrother","Big Brother",
    "bjaaland","Bjaaland",
    "blackwidow","BlackWidow",
    "blindekuh","Die Blinde Kuh",
    "bloodhound","Bloodhound",
    "brightnet","bright.net caching robot",
    "bspider","BSpider",
    "cactvschemistryspider","CACTVS Chemistry Spider",
    "calif[^r]","Calif",
    "cassandra","Cassandra",
    "cgireader","Digimarc Marcspider/CGI",
    "checkbot","Checkbot",
    "churl","churl",
    "cmc","CMC/0.01",
    "collective","Collective",
    "combine","Combine System",
    "conceptbot","Conceptbot",
    "coolbot","CoolBot",
    "core","Web Core / Roots",
    "cosmos","XYLEME Robot",
    "cruiser","Internet Cruiser Robot",
    "cusco","Cusco",
    "cyberspyder","CyberSpyder Link Test",
    "deweb","DeWeb(c) Katalog/Index",
    "dienstspider","DienstSpider",
    "digger","Digger",
    "diibot","Digital Integrity Robot",
    "directhit","Direct Hit Grabber",
    "dnabot","DNAbot",
    "download_express","DownLoad Express",
    "dragonbot","DragonBot",
    "dwcp","DWCP (Dridus' Web Cataloging Project)",
    "e-collector","e-collector",
    "ebiness","EbiNess",
    "eit","EIT Link Verifier Robot",
    "elfinbot","ELFINBOT",
    "emacs","Emacs-w3 Search Engine",
    "emcspider","ananzi",
    "esther","Esther",
    "evliyacelebi","Evliya Celebi",
    "nzexplorer","nzexplorer",
    "fdse","Fluid Dynamics Search Engine robot",
    "felix","Felix IDE",
    "ferret","Wild Ferret Web Hopper #1, #2, #3",
    "fetchrover","FetchRover",
    "fido","fido",
    "finnish","Hämähäkki",
    "fireball","KIT-Fireball",
    "[^a]fish","Fish search",
    "fouineur","Fouineur",
    "francoroute","Robot Francoroute",
    "freecrawl","Freecrawl",
    "funnelweb","FunnelWeb",
    "gama","gammaSpider, FocusedCrawler",
    "gazz","gazz",
    "gcreep","GCreep",
    "getbot","GetBot",
    "geturl","GetURL",
    "golem","Golem",
    "googlebot","Googlebot (Google)",
    "grapnel","Grapnel/0.01 Experiment",
    "griffon","Griffon",
    "gromit","Gromit",
    "gulliver","Northern Light Gulliver",
    "hambot","HamBot",
    "harvest","Harvest",
    "havindex","havIndex",
    "hometown","Hometown Spider Pro",
    "htdig","ht://Dig",
    "htmlgobble","HTMLgobble",
    "hyperdecontextualizer","Hyper-Decontextualizer",
    "iajabot","iajaBot",
    "ibm","IBM_Planetwide",
    "iconoclast","Popular Iconoclast",
    "ilse","Ingrid",
    "imagelock","Imagelock",
    "incywincy","IncyWincy",
    "informant","Informant",
    "infoseek","InfoSeek Robot 1.0",
    "infoseeksidewinder","Infoseek Sidewinder",
    "infospider","InfoSpiders",
    "inspectorwww","Inspector Web",
    "intelliagent","IntelliAgent",
    "irobot","I, Robot",
    "iron33","Iron33",
    "israelisearch","Israeli-search",
    "javabee","JavaBee",
    "jbot","JBot Java Web Robot",
    "jcrawler","JCrawler",
    "jeeves","Jeeves",
    "jobo","JoBo Java Web Robot",
    "jobot","Jobot",
    "joebot","JoeBot",
    "jubii","The Jubii Indexing Robot",
    "jumpstation","JumpStation",
    "katipo","Katipo",
    "kdd","KDD-Explorer",
    "kilroy","Kilroy",
    "ko_yappo_robot","KO_Yappo_Robot",
    "labelgrabber.txt","LabelGrabber",
    "larbin","larbin",
    "legs","legs",
    "linkidator","Link Validator",
    "linkscan","LinkScan",
    "linkwalker","LinkWalker",
    "lockon","Lockon",
    "logo_gif","logo.gif Crawler",
    "lycos","Lycos",
    "macworm","Mac WWWWorm",
    "magpie","Magpie",
    "marvin","marvin/infoseek",
    "mattie","Mattie",
    "mediafox","MediaFox",
    "merzscope","MerzScope",
    "meshexplorer","NEC-MeshExplorer",
    "mindcrawler","MindCrawler",
    "moget","moget",
    "momspider","MOMspider",
    "monster","Monster",
    "motor","Motor",
    "muscatferret","Muscat Ferret",
    "mwdsearch","Mwd.Search",
    "myweb","Internet Shinchakubin",
    "netcarta","NetCarta WebMap Engine",
    "netcraft","Netcraft Web Server Survey",
    "netmechanic","NetMechanic",
    "netscoop","NetScoop",
    "newscan-online","newscan-online",
    "nhse","NHSE Web Forager",
    "nomad","Nomad",
    "northstar","The NorthStar Robot",
    "occam","Occam",
    "octopus","HKU WWW Octopus",
    "openfind","Openfind data gatherer",
    "orb_search","Orb Search",
    "packrat","Pack Rat",
    "pageboy","PageBoy",
    "parasite","ParaSite",
    "patric","Patric",
    "pegasus","pegasus",
    "perignator","The Peregrinator",
    "perlcrawler","PerlCrawler 1.0",
    "phantom","Phantom",
    "piltdownman","PiltdownMan",
    "pimptrain","Pimptrain.com's robot",
    "pioneer","Pioneer",
    "pitkow","html_analyzer",
    "pjspider","Portal Juice Spider",
    "pka","PGP Key Agent",
    "plumtreewebaccessor","PlumtreeWebAccessor",
    "poppi","Poppi",
    "portalb","PortalB Spider",
    "puu","GetterroboPlus Puu",
    "python","The Python Robot",
    "raven","Raven Search",
    "rbse","RBSE Spider",
    "resumerobot","Resume Robot",
    "rhcs","RoadHouse Crawling System",
    "road_runner","Road Runner: The ImageScape Robot",
    "robbie","Robbie the Robot",
    "robi","ComputingSite Robi/1.0",
    "robofox","RoboFox",
    "robozilla","Robozilla",
    "roverbot","Roverbot",
    "rules","RuLeS",
    "safetynetrobot","SafetyNet Robot",
    "scooter","Scooter (AltaVista)",
    "search_au","Search.Aus-AU.COM",
    "searchprocess","SearchProcess",
    "senrigan","Senrigan",
    "sgscout","SG-Scout",
    "shaggy","ShagSeeker",
    "shaihulud","Shai'Hulud",
    "sift","Sift",
    "simbot","Simmany Robot Ver1.0",
    "site-valet","Site Valet",
    "sitegrabber","Open Text Index Robot",
    "sitetech","SiteTech-Rover",
    "slcrawler","SLCrawler",
    "slurp","Inktomi Slurp",
    "smartspider","Smart Spider",
    "snooper","Snooper",
    "solbot","Solbot",
    "spanner","Spanner",
    "speedy","Speedy Spider",
    "spider_monkey","spider_monkey",
    "spiderbot","SpiderBot",
    "spiderline","Spiderline Crawler",
    "spiderman","SpiderMan",
    "spiderview","SpiderView(tm)",
    "spry","Spry Wizard Robot",
    "ssearcher","Site Searcher",
    "suke","Suke",
    "suntek","suntek search engine",
    "sven","Sven",
    "tach_bw","TACH Black Widow",
    "tarantula","Tarantula",
    "tarspider","tarspider",
    #"tcl","Tcl W3 Robot",
    "techbot","TechBOT",
    "templeton","Templeton",
    "teoma_agent1","TeomaTechnologies",
    "titin","TitIn",
    "titan","TITAN",
    "tkwww","The TkWWW Robot",
    "tlspider","TLSpider",
    "ucsd","UCSD Crawl",
    "udmsearch","UdmSearch",
    "urlck","URL Check",
    "valkyrie","Valkyrie",
    "verticrawl","Verticrawl",
    "victoria","Victoria",
    "visionsearch","vision-search",
    "voyager\/","Voyager",
    "vwbot","VWbot",
    "w3index","The NWI Robot",
    "w3m2","W3M2",
    "wallpaper","WallPaper",
    "wanderer","the World Wide Web Wanderer",
    "wapspider","w\@pSpider by wap4.com",
    "webbandit","WebBandit Web Spider",
    "webcatcher","WebCatcher",
    "webcopy","WebCopy",
    "webfetcher","Webfetcher",
    "webfoot","The Webfoot Robot",
    "weblayers","Weblayers",
    "weblinker","WebLinker",
    "webmirror","WebMirror",
    "webmoose","The Web Moose",
    "webquest","WebQuest",
    "webreader","Digimarc MarcSpider",
    "webreaper","WebReaper",
    #"webs","Webs",
    "websnarf","Websnarf",
    "webspider","WebSpider",
    "webvac","WebVac",
    "webwalk","webwalk",
    "webwalker","WebWalker",
    "webwatch","WebWatch",
    "wget","Wget",
    "whatuseek","whatUseek Winona",
    "whowhere","WhoWhere Robot",
    "wired-digital","Wired Digital",
    "wmir","w3mir",
    "wolp","WebStolperer",
    "wombat","The Web Wombat",
    "worm","The World Wide Web Worm",
    "wwwc","WWWC Ver 0.2.5",
    "wz101","WebZinger",
    "xget","XGET",
    "nederland.zoek","Nederland.zoek",
    # Other robots reported by users
    "antibot", "Antibot",
    "aport", "Aport",
    "awbot", "AWBot",
    "baiduspider","BaiDuSpider",
    "bobby", "Bobby",
    "boris", "Boris",
    "bumblebee", "Bumblebee (relevare.com)",
    "cscrawler","CsCrawler",
    "daviesbot", "DaviesBot",
    "digout4u", "Digout4u",
    "echo", "EchO!",
    "exactseek","ExactSeek Crawler",
    "ezresult", "Ezresult",
    "fast-webcrawler", "Fast-Webcrawler (AllTheWeb)",
    "gigabot","GigaBot",
    "gnodspider","GNOD Spider",
    "henrythemiragorobot", "Mirago",
    "ia_archiver", "Alexa (IA Archiver)",
    "internetseer", "InternetSeer",
    "jennybot", "JennyBot",
    "justview", "JustView",
    "linkbot","LinkBot",
    "linkchecker","LinkChecker",
    "mercator", "Mercator",
    "msiecrawler","MSIECrawler",
    "msnbot","MSNBot",
    "perman", "Perman surfer",
    "petersnews", "Petersnews",
    "pompos","Pompos",
    "psbot","psBot",
    "rambler", "StackRambler",
    "redalert", "Red Alert",
    "shoutcast","Shoutcast Directory Service",
    "slysearch","SlySearch",
    "surveybot","SurveyBot",
    "turnitinbot","Turn It In",
    "turtle", "Turtle",
    "turtlescanner", "Turtle",
    "ultraseek", "Ultraseek",
    "unlost_web_crawler", "Unlost Web Crawler",
    "voila", "Voila",
    "webbase", "WebBase",
    "webcompass", "webcompass",
    "webclipping\.com", "WebClipping.com",
    "wisenutbot","WISENutbot (Looksmart)",
    "yandex", "Yandex bot",
    "zealbot","ZealBot",
    "zyborg","Zyborg (Looksmart)",

    # Generic root ID
    "robot", "Unknown robot (identified by 'robot')",
    "crawl", "Unknown robot (identified by 'crawl')",
    "spider", "Unknown robot (identified by 'spider')"
    );
    میں کانچ کھا سکتا ہوں اور مجھے تکلیف نہیں ہوتی.
    நான் கண்ணாடி சாப்பிடுவேன், அதனால் எனக்கு ஒரு கேடும் வராது

+ Antworten

Aktive Benutzer

Aktive Benutzer

Aktive Benutzer in diesem Thema: 1 (Registrierte Benutzer: 0, Gäste: 1)

     

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51