Updated: Monday, October 16,2023-10-16 12:27:27
parent
f78156d0d2
commit
422a0f2f94
|
@ -1,5 +1,13 @@
|
||||||
{
|
{
|
||||||
"recentFiles": [
|
"recentFiles": [
|
||||||
|
{
|
||||||
|
"basename": "Webscraping",
|
||||||
|
"path": "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"basename": "Robots.txt Files",
|
||||||
|
"path": "Robots.txt Files.md"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"basename": "Potentiometers & Analog SerialReader",
|
"basename": "Potentiometers & Analog SerialReader",
|
||||||
"path": "Machine Tips (Quantum)/Physics/Hardware/Potentiometers & Analog SerialReader.md"
|
"path": "Machine Tips (Quantum)/Physics/Hardware/Potentiometers & Analog SerialReader.md"
|
||||||
|
@ -46,7 +54,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"basename": "Fire",
|
"basename": "Fire",
|
||||||
"path": "Coding Tips (Classical)/Terminal Tips/Fire.md"
|
"path": "Coding Tips (Classical)/Terminal Tips/Languages/Python/Fire.md"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"basename": "Choosing a Name for Your Computer",
|
"basename": "Choosing a Name for Your Computer",
|
||||||
|
|
|
@ -18,8 +18,21 @@
|
||||||
"source": false
|
"source": false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "0a0de85a51848b9d",
|
||||||
|
"type": "leaf",
|
||||||
|
"state": {
|
||||||
|
"type": "markdown",
|
||||||
|
"state": {
|
||||||
|
"file": "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md",
|
||||||
|
"mode": "source",
|
||||||
|
"source": false
|
||||||
}
|
}
|
||||||
]
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"currentTab": 1
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"direction": "vertical"
|
"direction": "vertical"
|
||||||
|
@ -78,7 +91,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"direction": "horizontal",
|
"direction": "horizontal",
|
||||||
"width": 200
|
"width": 323.5
|
||||||
},
|
},
|
||||||
"right": {
|
"right": {
|
||||||
"id": "4bd9c02fbfe6785f",
|
"id": "4bd9c02fbfe6785f",
|
||||||
|
@ -94,6 +107,7 @@
|
||||||
"state": {
|
"state": {
|
||||||
"type": "backlink",
|
"type": "backlink",
|
||||||
"state": {
|
"state": {
|
||||||
|
"file": "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md",
|
||||||
"collapseAll": false,
|
"collapseAll": false,
|
||||||
"extraContext": false,
|
"extraContext": false,
|
||||||
"sortOrder": "alphabetical",
|
"sortOrder": "alphabetical",
|
||||||
|
@ -110,6 +124,7 @@
|
||||||
"state": {
|
"state": {
|
||||||
"type": "outgoing-link",
|
"type": "outgoing-link",
|
||||||
"state": {
|
"state": {
|
||||||
|
"file": "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md",
|
||||||
"linksCollapsed": false,
|
"linksCollapsed": false,
|
||||||
"unlinkedCollapsed": true
|
"unlinkedCollapsed": true
|
||||||
}
|
}
|
||||||
|
@ -132,7 +147,7 @@
|
||||||
"state": {
|
"state": {
|
||||||
"type": "outline",
|
"type": "outline",
|
||||||
"state": {
|
"state": {
|
||||||
"file": "Excalidraw/Drawing 2023-10-16 12.13.42.excalidraw.md"
|
"file": "Coding Tips (Classical)/Terminal Tips/GUIs/Tools/Webscraping.md"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -161,8 +176,9 @@
|
||||||
},
|
},
|
||||||
"active": "dbad7b010371d947",
|
"active": "dbad7b010371d947",
|
||||||
"lastOpenFiles": [
|
"lastOpenFiles": [
|
||||||
"Machine Tips (Quantum)/Physics/Hardware/Potentiometers & Analog SerialReader.md",
|
"Robots.txt Files.md",
|
||||||
"Excalidraw/Drawing 2023-10-16 12.13.42.excalidraw.md",
|
"Excalidraw/Drawing 2023-10-16 12.13.42.excalidraw.md",
|
||||||
|
"Machine Tips (Quantum)/Physics/Hardware/Potentiometers & Analog SerialReader.md",
|
||||||
"Excalidraw",
|
"Excalidraw",
|
||||||
"Machine Tips (Quantum)/Physics/Optics.md",
|
"Machine Tips (Quantum)/Physics/Optics.md",
|
||||||
"Machine Tips (Quantum)/Physics/Hardware/Laser Cutting.md",
|
"Machine Tips (Quantum)/Physics/Hardware/Laser Cutting.md",
|
||||||
|
@ -174,7 +190,7 @@
|
||||||
"Machine Tips (Quantum)/Math/Proof of a formula for modulo.md",
|
"Machine Tips (Quantum)/Math/Proof of a formula for modulo.md",
|
||||||
"Machine Tips (Quantum)/Math/Quantum Formalism.md",
|
"Machine Tips (Quantum)/Math/Quantum Formalism.md",
|
||||||
"Machine Tips (Quantum)/Math/Visualizing the Quantum Space.md",
|
"Machine Tips (Quantum)/Math/Visualizing the Quantum Space.md",
|
||||||
"Coding Tips (Classical)/Terminal Tips/Fire.md",
|
"Coding Tips (Classical)/Terminal Tips/Languages/Python/Fire.md",
|
||||||
"Coding Tips (Classical)/Terminal Tips/Computers/Choosing a Name for Your Computer.md",
|
"Coding Tips (Classical)/Terminal Tips/Computers/Choosing a Name for Your Computer.md",
|
||||||
"Coding Tips (Classical)/Terminal Tips/CLI Tools/Terminal Emulators.md",
|
"Coding Tips (Classical)/Terminal Tips/CLI Tools/Terminal Emulators.md",
|
||||||
"Coding Tips (Classical)/Terminal Tips/CLI Tools/fzf.md",
|
"Coding Tips (Classical)/Terminal Tips/CLI Tools/fzf.md",
|
||||||
|
@ -191,7 +207,6 @@
|
||||||
"Coding Tips (Classical)/Project Vault/Current Occupations/Manhattan Youth",
|
"Coding Tips (Classical)/Project Vault/Current Occupations/Manhattan Youth",
|
||||||
"Coding Tips (Classical)/Project Vault/Current Occupations/Website Projects/My Domain Names.md",
|
"Coding Tips (Classical)/Project Vault/Current Occupations/Website Projects/My Domain Names.md",
|
||||||
"Coding Tips (Classical)/Project Vault/Current Occupations/Potential and Future/Career Tips.md",
|
"Coding Tips (Classical)/Project Vault/Current Occupations/Potential and Future/Career Tips.md",
|
||||||
"Coding Tips (Classical)/Project Vault/Current Occupations/Potential and Future/60 careers/Academic Tutor.md",
|
|
||||||
"Coding Tips (Classical)/Project Vault/About Obsidian/imgFiles/Pasted image 20231011091043.png",
|
"Coding Tips (Classical)/Project Vault/About Obsidian/imgFiles/Pasted image 20231011091043.png",
|
||||||
"Coding Tips (Classical)/Project Vault/About Obsidian/Slides & Tools/export/Slides/plugin/chalkboard/_style.css",
|
"Coding Tips (Classical)/Project Vault/About Obsidian/Slides & Tools/export/Slides/plugin/chalkboard/_style.css",
|
||||||
"Coding Tips (Classical)/Project Vault/About Obsidian/Slides & Tools/export/Slides/plugin/chalkboard/img/blackboard.png",
|
"Coding Tips (Classical)/Project Vault/About Obsidian/Slides & Tools/export/Slides/plugin/chalkboard/img/blackboard.png",
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
Webscraping is a common task in the CS world that makes it easy and efficient to extract large amounts of data. It is part of a larger topic of data mining which allows for the human understandable analysis of all the data that is out there.
|
Webscraping is a common task in the CS world that makes it easy and efficient to extract large amounts of data. It is part of a larger topic of data mining which allows for the human understandable analysis of all the data that is out there.
|
||||||
|
|
||||||
You will often use requests and beautifulsoup libraries.
|
You will often use requests and beautifulsoup libraries. To prevent webscraping on your own sites, refer to the rob
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
|
||||||
|
Robots.txt is an increasingly important file found on websites that determine whether you permit a website crawler to index your page for search engine optimization. As webscraping is entirely legal in the US, this is the wild west of scraping and thus I want to keep mu brain and information safe from scraping.
|
||||||
|
|
||||||
|
|
||||||
|
*Resources*:
|
||||||
|
- [Robots.txt file examples](https://blog.hubspot.com/marketing/robots-txt-file)
|
Loading…
Reference in New Issue