From ec4c8f6cab8cf07270a0df4ff88dda29aacc5840 Mon Sep 17 00:00:00 2001 From: "Gustavo L de Mello (Guz)" Date: Tue, 17 Dec 2024 11:24:51 -0300 Subject: [PATCH] feat: add ai.txt and robots.txt to prevent AI crawlers --- app/app.go | 21 +++++++++++++++++++++ assets/ai.txt | 6 ++++++ assets/assets.go | 2 +- assets/robots.txt | 42 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 assets/ai.txt create mode 100644 assets/robots.txt diff --git a/app/app.go b/app/app.go index 2cfb1ec..0720ba1 100644 --- a/app/app.go +++ b/app/app.go @@ -11,6 +11,7 @@ import ( "forge.capytal.company/capytalcode/project-comicverse/lib/router" "forge.capytal.company/capytalcode/project-comicverse/lib/router/rerrors" "github.com/minio/minio-go/v7" + "keikos.work/assets" "keikos.work/configs" "keikos.work/handlers/pages" ) @@ -70,6 +71,26 @@ func NewWeb(opts Options) App { r.Handle("/", &pages.Home{}) r.Handle("/assets/", opts.Assets) + robots, err := assets.ASSETS.ReadFile("robots.txt") + if err != nil { + panic(err) + } + r.HandleFunc("/robots.txt", func(w http.ResponseWriter, r *http.Request) { + if _, err := w.Write(robots); err != nil { + rerrors.InternalError(err).ServeHTTP(w, r) + } + }) + + ai, err := assets.ASSETS.ReadFile("ai.txt") + if err != nil { + panic(err) + } + r.HandleFunc("/ai.txt", func(w http.ResponseWriter, r *http.Request) { + if _, err := w.Write(ai); err != nil { + rerrors.InternalError(err).ServeHTTP(w, r) + } + }) + // imgs := &pages.Images{S3: opts.S3} // r.HandleFunc("GET /images", imgs.List) // r.HandleFunc("GET /images/{name}", imgs.Get) diff --git a/assets/ai.txt b/assets/ai.txt new file mode 100644 index 0000000..9df92b0 --- /dev/null +++ b/assets/ai.txt @@ -0,0 +1,6 @@ +# Spawning AI +# Prevent datasets from using the following file types + +User-Agent: * +Disallow: / +Disallow: * diff --git a/assets/assets.go b/assets/assets.go index 8f7a143..66c08c8 100644 --- a/assets/assets.go +++ b/assets/assets.go @@ -4,5 +4,5 @@ import ( "embed" ) -//go:embed css fonts img +//go:embed css fonts img robots.txt ai.txt var ASSETS embed.FS diff --git a/assets/robots.txt b/assets/robots.txt new file mode 100644 index 0000000..c41ed6d --- /dev/null +++ b/assets/robots.txt @@ -0,0 +1,42 @@ +User-agent: AI2Bot +User-agent: Ai2Bot-Dolma +User-agent: Amazonbot +User-agent: anthropic-ai +User-agent: Applebot +User-agent: Applebot-Extended +User-agent: Bytespider +User-agent: CCBot +User-agent: ChatGPT-User +User-agent: Claude-Web +User-agent: ClaudeBot +User-agent: cohere-ai +User-agent: Diffbot +User-agent: DuckAssistBot +User-agent: FacebookBot +User-agent: FriendlyCrawler +User-agent: Google-Extended +User-agent: GoogleOther +User-agent: GoogleOther-Image +User-agent: GoogleOther-Video +User-agent: GPTBot +User-agent: iaskspider/2.0 +User-agent: ICC-Crawler +User-agent: ImagesiftBot +User-agent: img2dataset +User-agent: ISSCyberRiskCrawler +User-agent: Kangaroo Bot +User-agent: Meta-ExternalAgent +User-agent: Meta-ExternalFetcher +User-agent: OAI-SearchBot +User-agent: omgili +User-agent: omgilibot +User-agent: PanguBot +User-agent: PerplexityBot +User-agent: PetalBot +User-agent: Scrapy +User-agent: Sidetrade indexer bot +User-agent: Timpibot +User-agent: VelenPublicWebCrawler +User-agent: Webzio-Extended +User-agent: YouBot +Disallow: /