From 4e5db77998c85a5db8d6dd6f61c2ccba91507ba4 Mon Sep 17 00:00:00 2001 From: shwetha729 Date: Mon, 16 Oct 2023 12:42:43 -0400 Subject: [PATCH] Updated: Monday, October 16,2023-10-16 12:42:42 --- .../plugins/recent-files-obsidian/data.json | 8 ++++---- enter/.obsidian/workspace.json | 12 ++++++------ .../GUIs/Internet/Websites/Robots.txt Files.md | 7 +++++-- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/enter/.obsidian/plugins/recent-files-obsidian/data.json b/enter/.obsidian/plugins/recent-files-obsidian/data.json index 1fcf51f..2b619dc 100644 --- a/enter/.obsidian/plugins/recent-files-obsidian/data.json +++ b/enter/.obsidian/plugins/recent-files-obsidian/data.json @@ -1,13 +1,13 @@ { "recentFiles": [ - { - "basename": "Webscraping", - "path": "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md" - }, { "basename": "Robots.txt Files", "path": "Coding Tips (Classical)/Terminal Tips/GUIs/Internet/Websites/Robots.txt Files.md" }, + { + "basename": "Webscraping", + "path": "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md" + }, { "basename": "Potentiometers & Analog SerialReader", "path": "Machine Tips (Quantum)/Physics/Hardware/Potentiometers & Analog SerialReader.md" diff --git a/enter/.obsidian/workspace.json b/enter/.obsidian/workspace.json index 290903e..b80694a 100644 --- a/enter/.obsidian/workspace.json +++ b/enter/.obsidian/workspace.json @@ -25,7 +25,7 @@ "state": { "type": "markdown", "state": { - "file": "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md", + "file": "Coding Tips (Classical)/Terminal Tips/GUIs/Internet/Websites/Robots.txt Files.md", "mode": "source", "source": false } @@ -107,7 +107,7 @@ "state": { "type": "backlink", "state": { - "file": "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md", + "file": "Coding Tips (Classical)/Terminal Tips/GUIs/Internet/Websites/Robots.txt Files.md", "collapseAll": false, "extraContext": false, "sortOrder": "alphabetical", @@ -124,7 +124,7 @@ "state": { "type": "outgoing-link", "state": { - "file": "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md", + "file": "Coding Tips (Classical)/Terminal Tips/GUIs/Internet/Websites/Robots.txt Files.md", "linksCollapsed": false, "unlinkedCollapsed": true } @@ -147,7 +147,7 @@ "state": { "type": "outline", "state": { - "file": "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md" + "file": "Coding Tips (Classical)/Terminal Tips/GUIs/Internet/Websites/Robots.txt Files.md" } } } @@ -174,8 +174,9 @@ "obsidian-excalidraw-plugin:Create new drawing": false } }, - "active": "dbad7b010371d947", + "active": "0a0de85a51848b9d", "lastOpenFiles": [ + "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md", "Coding Tips (Classical)/Terminal Tips/GUIs/Internet/Websites/Robots.txt Files.md", "Excalidraw/Drawing 2023-10-16 12.13.42.excalidraw.md", "Machine Tips (Quantum)/Physics/Hardware/Potentiometers & Analog SerialReader.md", @@ -206,7 +207,6 @@ "Untitled.canvas", "Coding Tips (Classical)/Project Vault/Current Occupations/Manhattan Youth", "Coding Tips (Classical)/Project Vault/Current Occupations/Website Projects/My Domain Names.md", - "Coding Tips (Classical)/Project Vault/Current Occupations/Potential and Future/Career Tips.md", "Coding Tips (Classical)/Project Vault/About Obsidian/imgFiles/Pasted image 20231011091043.png", "Coding Tips (Classical)/Project Vault/About Obsidian/Slides & Tools/export/Slides/plugin/chalkboard/_style.css", "Coding Tips (Classical)/Project Vault/About Obsidian/Slides & Tools/export/Slides/plugin/chalkboard/img/blackboard.png", diff --git a/enter/Coding Tips (Classical)/Terminal Tips/GUIs/Internet/Websites/Robots.txt Files.md b/enter/Coding Tips (Classical)/Terminal Tips/GUIs/Internet/Websites/Robots.txt Files.md index bf81f2c..1aba808 100644 --- a/enter/Coding Tips (Classical)/Terminal Tips/GUIs/Internet/Websites/Robots.txt Files.md +++ b/enter/Coding Tips (Classical)/Terminal Tips/GUIs/Internet/Websites/Robots.txt Files.md @@ -1,6 +1,9 @@ -Robots.txt is an increasingly important file found on websites that determine whether you permit a website crawler to index your page for search engine optimization. As webscraping is entirely legal in the US, this is the wild west of scraping and thus I want to keep mu brain and information safe from scraping. +Robots.txt is an increasingly important file found on websites that determine whether you permit a website crawler to index your page for search engine optimization. As web-scraping is entirely legal in the US, this is the wild west of scraping and thus I want to keep mu brain and information safe from scraping. +Fun Fact: Google [open-sourced](https://opensource.googleblog.com/2019/07/googles-robotstxt-parser-is-now-open.html) their [robots.txt parser](https://github.com/google/robotstxt) in 2019 f you want to see an example of reverse engineering the robots.txt file for search indexing. *Resources*: -- [Robots.txt file examples](https://blog.hubspot.com/marketing/robots-txt-file) \ No newline at end of file +- [Robots.txt file examples](https://blog.hubspot.com/marketing/robots-txt-file) +- Robots.txt [generator tool](https://www.internetmarketingninjas.com/tools/robots-txt-generator/) +