From 84a3e444989aa367f19b744892e948897a9d3e21 Mon Sep 17 00:00:00 2001 From: Philipp Kunz Date: Sun, 14 Apr 2024 18:13:19 +0200 Subject: [PATCH] update tsconfig --- npmextra.json | 11 +++- package.json | 8 ++- readme.hints.md | 1 + readme.md | 134 ++++++++++++++++++++++++++++++++++++++---------- 4 files changed, 124 insertions(+), 30 deletions(-) create mode 100644 readme.hints.md diff --git a/npmextra.json b/npmextra.json index 0dfe7d7..4b8c7b9 100644 --- a/npmextra.json +++ b/npmextra.json @@ -8,14 +8,21 @@ "shortDescription": "a module for making sense of robots.txt", "npmPackagename": "@push.rocks/smartrobots", "license": "MIT", - "projectDomain": "push.rocks" + "projectDomain": "push.rocks", + "description": "A module for parsing and making sense of robots.txt files.", + "keywords": [ + "robots.txt", + "parsing", + "web scraping", + "SEO tools" + ] } }, "npmci": { "npmGlobalTools": [], "npmAccessLevel": "public" }, - "tsdocs": { + "tsdoc": { "legal": "\n## License and Legal Information\n\nThis repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. \n\n**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.\n\n### Trademarks\n\nThis project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.\n\n### Company Information\n\nTask Venture Capital GmbH \nRegistered at District court Bremen HRB 35230 HB, Germany\n\nFor any legal inquiries or if you require further information, please contact us via email at hello@task.vc.\n\nBy using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.\n" } } \ No newline at end of file diff --git a/package.json b/package.json index 4bf9efa..29ec538 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "@push.rocks/smartrobots", "version": "1.0.2", "private": false, - "description": "a module for making sense of robots.txt", + "description": "A module for parsing and making sense of robots.txt files.", "main": "dist_ts/index.js", "typings": "dist_ts/index.d.ts", "author": "Lossless GmbH", @@ -37,5 +37,11 @@ "cli.js", "npmextra.json", "readme.md" + ], + "keywords": [ + "robots.txt", + "parsing", + "web scraping", + "SEO tools" ] } \ No newline at end of file diff --git a/readme.hints.md b/readme.hints.md new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/readme.hints.md @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/readme.md b/readme.md index 5d8ec72..de0f921 100644 --- a/readme.md +++ b/readme.md @@ -1,39 +1,119 @@ -# @pushrocks/smartrobots +# @push.rocks/smartrobots a module for making sense of robots.txt -## Availabililty and Links -* [npmjs.org (npm package)](https://www.npmjs.com/package/@pushrocks/smartrobots) -* [gitlab.com (source)](https://gitlab.com/pushrocks/smartrobots) -* [github.com (source mirror)](https://github.com/pushrocks/smartrobots) -* [docs (typedoc)](https://pushrocks.gitlab.io/smartrobots/) +## Install -## Status for master +To install `@push.rocks/smartrobots`, you need to have Node.js installed on your system. Once Node.js is installed, you can install `@push.rocks/smartrobots` by running the following command in your terminal: -Status Category | Status Badge --- | -- -GitLab Pipelines | [![pipeline status](https://gitlab.com/pushrocks/smartrobots/badges/master/pipeline.svg)](https://lossless.cloud) -GitLab Pipline Test Coverage | [![coverage report](https://gitlab.com/pushrocks/smartrobots/badges/master/coverage.svg)](https://lossless.cloud) -npm | [![npm downloads per month](https://badgen.net/npm/dy/@pushrocks/smartrobots)](https://lossless.cloud) -Snyk | [![Known Vulnerabilities](https://badgen.net/snyk/pushrocks/smartrobots)](https://lossless.cloud) -TypeScript Support | [![TypeScript](https://badgen.net/badge/TypeScript/>=%203.x/blue?icon=typescript)](https://lossless.cloud) -node Support | [![node](https://img.shields.io/badge/node->=%2010.x.x-blue.svg)](https://nodejs.org/dist/latest-v10.x/docs/api/) -Code Style | [![Code Style](https://badgen.net/badge/style/prettier/purple)](https://lossless.cloud) -PackagePhobia (total standalone install weight) | [![PackagePhobia](https://badgen.net/packagephobia/install/@pushrocks/smartrobots)](https://lossless.cloud) -PackagePhobia (package size on registry) | [![PackagePhobia](https://badgen.net/packagephobia/publish/@pushrocks/smartrobots)](https://lossless.cloud) -BundlePhobia (total size when bundled) | [![BundlePhobia](https://badgen.net/bundlephobia/minzip/@pushrocks/smartrobots)](https://lossless.cloud) -Platform support | [![Supports Windows 10](https://badgen.net/badge/supports%20Windows%2010/yes/green?icon=windows)](https://lossless.cloud) [![Supports Mac OS X](https://badgen.net/badge/supports%20Mac%20OS%20X/yes/green?icon=apple)](https://lossless.cloud) +```bash +npm install @push.rocks/smartrobots --save +``` + +This command will download and install `@push.rocks/smartrobots` and its dependencies into your project's `node_modules` directory. ## Usage -Use TypeScript for best in class intellisense +This tutorial will guide you through utilizing `@push.rocks/smartrobots`, a TypeScript module designed for interpreting `robots.txt` files of websites. By working through various scenarios, you'll learn how to efficiently parse and work with the data provided by `robots.txt`. -## Contribution +### Prerequisites -We are always happy for code contributions. If you are not the code contributing type that is ok. Still, maintaining Open Source repositories takes considerable time and thought. If you like the quality of what we do and our modules are useful to you we would appreciate a little monthly contribution: You can [contribute one time](https://lossless.link/contribute-onetime) or [contribute monthly](https://lossless.link/contribute). :) +Before diving into the examples, ensure you are familiar with basic TypeScript syntax and concepts. You also need a basic understanding of what `robots.txt` is and its significance in web development. -For further information read the linked docs at the top of this readme. +### Basic Setup -> MIT licensed | **©** [Lossless GmbH](https://lossless.gmbh) -| By using this npm module you agree to our [privacy policy](https://lossless.gmbH/privacy) +First, ensure you have imported `Smartrobots` from the `@push.rocks/smartrobots` package in your TypeScript file: -[![repo-footer](https://lossless.gitlab.io/publicrelations/repofooter.svg)](https://maintainedby.lossless.com) +```typescript +import { Smartrobots } from '@push.rocks/smartrobots'; +``` + +Instantiate the `Smartrobots` class to get started: + +```typescript +const mySmartrobots = new Smartrobots(); +``` + +### Parsing `robots.txt` from a URL + +One common use case is to parse the `robots.txt` file directly from a website. The `Smartrobots` class provides an easy-to-use method to achieve this, as demonstrated below: + +```typescript +async function parseRobotsFromUrl() { + const url = 'https://example.com/robots.txt'; // Replace with the URL to the desired robots.txt + try { + const parsedData = await mySmartrobots.parseRobotsTxtFromUrl(url); + console.log('Parsed robots.txt data:', parsedData); + } catch (error) { + console.error('Error parsing robots.txt from URL:', error); + } +} + +parseRobotsFromUrl(); +``` + +This function asynchronously fetches the `robots.txt` file from the specified URL and logs the parsed content to the console. + +### Parsing a `robots.txt` String + +If you already have the contents of a `robots.txt` file as a string, you can parse it directly using the `parseRobotsTxt` method. Here's how: + +```typescript +async function parseRobotsFromString(robotsTxtString: string) { + try { + const parsedData = await mySmartrobots.parseRobotsTxt(robotsTxtString); + console.log('Parsed robots.txt data:', parsedData); + } catch (error) { + console.error('Error parsing robots.txt string:', error); + } +} + +// Example robots.txt string +const robotsTxtString = ` +User-agent: * +Disallow: /secret-page +Sitemap: https://example.com/sitemap.xml +`; + +parseRobotsFromString(robotsTxtString); +``` + +This function takes a string representation of a `robots.txt` file, parses it, and logs the results. In the example string, there are directives for user-agents and a sitemap URL. + +### Understanding the Parsed Data + +The parsed data from `robots.txt` is returned as an object. In its current implementation, `@push.rocks/smartrobots` focuses on extracting sitemap URLs. Here's a sample output from parsing the example `robots.txt` string: + +```json +{ + "sitemaps": ["https://example.com/sitemap.xml"] +} +``` + +You can extend the parsing logic based on your requirements to handle more directives from `robots.txt`. + +### Conclusion + +`@push.rocks/smartrobots` provides a straightforward and efficient way to interpret `robots.txt` files in TypeScript projects. Whether you're fetching and parsing `robots.txt` from a URL or working with its contents as a string, this module simplifies the process, allowing you to focus on utilizing the data rather than parsing intricacies. + +Remember, `robots.txt` files are publicly accessible and should be used responsibly following web standards and etiquette. + +For more advanced use cases, consider contributing to or extending the functionality of `@push.rocks/smartrobots` to cover a broader range of directives and scenarios. + +## License and Legal Information + +This repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. + +**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file. + +### Trademarks + +This project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH. + +### Company Information + +Task Venture Capital GmbH +Registered at District court Bremen HRB 35230 HB, Germany + +For any legal inquiries or if you require further information, please contact us via email at hello@task.vc. + +By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.