disallow AI bots in robots.txt, mail.rb title, valid rss

author: pdp8 <pdp8@pdp8.info> 2024-03-04 13:42:04 +0100
committer: pdp8 <pdp8@pdp8.info> 2024-03-04 13:42:04 +0100
commit: 6b40f3b54efabb740e825ba3d94b5695c2b98ede (patch)
tree: 2e2fae35cae72f73878cd5295e0f183891e284dc
parent: 4bd4cb578ee4579eaa81efec4b478654fae8e236 (diff)
8 files changed, 94 insertions, 20 deletions
diff --git a/Makefile b/Makefile
index 19477c0..5acb8ec 100644
--- a/Makefile
+++ b/Makefile
@@ -34,7 +34,7 @@ videos: $(video_mp4) $(video_webp)
 climbing: $(climbing_mp4) $(climbing_webp)
 
 %.mp3: %.flac
-	ffmpeg -i $< -ab 256k -map_metadata 0 -id3v2_version 3 $@
+	ffmpeg -i $< -y -vsync 0 -ab 256k -map_metadata 0 -id3v2_version 3 $@
 
 %.webp: %.webm
 	ffmpeg -i $< -hide_banner -loglevel error -vf thumbnail -frames:v 1 -c:v png -f image2pipe - | convert - $@
diff --git a/html.rb b/html.rb
index 171d487..5f45ddc 100755
--- a/html.rb
+++ b/html.rb
@@ -38,7 +38,7 @@ def print_html(basename, html)
 end
 
 def music_html
-  music = Dir[File.join(MEDIA_DIR, 'music', '*')].sort.reverse
+  music = Dir[File.join(MEDIA_DIR, 'music', '20*')].sort.reverse
   html = File.read(File.join(SNIPPETS, 'head.html'))
   html += nav 'music'
   html += '<div class="post"><a href="https://faircamp.webr.ing/prev/pdp8.info/music.html">← prev</a> |
@@ -46,7 +46,7 @@ def music_html
     <a href="https://faircamp.webr.ing/rand">random</a> |
     <a href="https://faircamp.webr.ing/next/pdp8.info/music.html">next →</a></div>'
   music.each do |dir|
-    next if dir.match 'alfadeo'
+    # next if dir.match 'alfadeo'
 
     date = File.basename(dir).split('_')[0]
     html += "<div class='post' id='#{date}'>"
@@ -185,5 +185,6 @@ puts `cp "#{File.join(WWW_DIR, last)}" "#{File.join(WWW_DIR, 'index.html')}"`
   '540px-PDP-8_.jpg',
   'style.css',
   'slideshow.js',
-  'robots.txt'
+  'robots.txt',
+  'sitemap.txt'
 ].each { |f| puts `rsync -av "#{File.join(SNIPPETS, f)}" "#{WWW_DIR}"` }
diff --git a/html/robots.txt b/html/robots.txt
index eb05362..1bd8b9e 100644
--- a/html/robots.txt
+++ b/html/robots.txt
@@ -1,2 +1,31 @@
-User-agent: *
-Disallow:
+User-agent: Amazonbot
+Disallow: /
+
+User-agent: ChatGPT-User
+Disallow: /
+
+User-agent: GPTBot
+Disallow: /
+
+User-agent: Google-Extended
+Disallow: /
+
+User-agent: anthropic-ai
+Disallow: /
+
+User-agent: Omgilibot
+Disallow: /
+
+User-agent: Omgili
+Disallow: /
+
+User-agent: FacebookBot
+Disallow: /
+
+User-agent: Bytespider
+Disallow: /
+
+User-agent: ImagesiftBot 
+Disallow: /
+
+SITEMAP: https://pdp8.info/sitemap.txt
diff --git a/html/sitemap.txt b/html/sitemap.txt
new file mode 100644
index 0000000..1dce6f3
--- /dev/null
+++ b/html/sitemap.txt
@@ -0,0 +1,11 @@
+https://pdp8.info
+https://pdp8.info/about.html
+https://pdp8.info/climbing.html
+https://pdp8.info/code.html
+https://pdp8.info/contact.html
+https://pdp8.info/index.html
+https://pdp8.info/music.html
+https://pdp8.info/pictures.html
+https://pdp8.info/social/announce.html
+https://pdp8.info/social/create.html
+https://pdp8.info/videos.html
diff --git a/html/style.css b/html/style.css
index 1d5bfab..dbf2307 100644
--- a/html/style.css
+++ b/html/style.css
@@ -52,7 +52,7 @@ nav a:hover {
 }
 
 .post {
-  margin: 1em;
+  margin: 2em 1em;
 }
 
 img,
diff --git a/internetarchive.sh b/internetarchive.sh
index d987095..ff53348 100755
--- a/internetarchive.sh
+++ b/internetarchive.sh
@@ -8,4 +8,4 @@ title=$(echo $album| sed 's/_/ /g')
 url=https://pdp8.info/music.html#$date
 description="$(sed 's/^$/<br>/' README) <p> original release: <a href="$url">$url</a> "
 description=$(echo $description) # remove newlines
-ia upload pdp8_$album *flac cover.jpeg --metadata="title:$title" --metadata="mediatype:audio" --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0/" --metadata="date:$date" --metadata="description:$description" --metadata="artist:pdp8@pdp8.info" --metadata="album:$album" --metadata="creator:pdp8@pdp8.info" --metadata="genre:electronic,techno"
\ No newline at end of file
+ia upload pdp8_$album *flac cover.webp --metadata="title:$title" --metadata="mediatype:audio" --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0/" --metadata="date:$date" --metadata="description:$description" --metadata="artist:pdp8@pdp8.info" --metadata="album:$album" --metadata="creator:pdp8@pdp8.info" --metadata="genre:electronic,techno"
diff --git a/mail.rb b/mail.rb
index 35d495d..1aac68c 100755
--- a/mail.rb
+++ b/mail.rb
@@ -8,18 +8,28 @@ mailfile = if ARGV[1] and ARGV[1] == 'publish'
 
 Dir.chdir ARGV[0]
 date, title = File.basename(ARGV[0]).split('_', 2)
+nr_tracks = Dir['*.flac'].size
+type = if nr_tracks == 1
+         'single'
+       elsif nr_tracks < 6
+         'ep'
+       elsif nr_tracks > 0
+         'lp'
+       end
 content = ["From: info@pdp8.info
-Subject: [pdp8] #{title.gsub('_', ' ')}
+Subject: [pdp8] #{title.gsub('_', ' ')} #{type}
 Content-Type: text/plain
 List-Unsubscribe: <mailto: info@pdp8.info?subject=unsubscribe>"]
 content << ''
-content << File.read('README')
+content << "the new pdp8 #{type} \"#{title.gsub('_', ' ')}\" is online:"
+# content << File.read('README').chomp
 content << ''
-content << "https://pdp8.info/music.html\##{date}"
+content << "web: https://pdp8.info/music.html\##{date}"
+content << "internet archive: https://archive.org/details/pdp8_#{title}"
 bc = if File.exist?('bandcamp')
-       File.read('bandcamp').chomp
+       "bandcamp: #{File.read('bandcamp').chomp}"
      else
-       "https://pdp8.bandcamp.com/album/#{title.gsub(' ', '-')}"
+       "bandcamp: https://pdp8.bandcamp.com/album/#{title.gsub('_', '-')}"
      end
 content << bc
 content << ''
diff --git a/rss.rb b/rss.rb
index 554d45c..eb163d4 100755
--- a/rss.rb
+++ b/rss.rb
@@ -1,43 +1,66 @@
 #!/usr/bin/env ruby
 require 'date'
+require 'json'
+
 MEDIA = '/srv/media/'
+SOCIAL = '/srv/social/outbox/object/note/'
 WWW = '/srv/www/pdp8-test/'
+
 items = []
 %w[music videos].each do |cat|
   Dir[File.join(MEDIA, cat, '*')].each do |dir|
+    next unless File.basename(dir).match(/^\d/)
+
     date = File.basename(dir).split('_')[0]
-    updated = Date.parse(date) # .strftime('%Y-%m-%d')
+    updated = Date.parse(date)
     items << {
       title: File.basename(dir).split('_')[1..-1].join(' '),
       link: File.join('https://pdp8.info', cat + '.html#' + date),
       guid: File.join('https://pdp8.info', cat + '.html#' + date),
-      description: File.read(File.join(dir, 'README')).chomp.sub(/^\n/, '').sub("\n\n", "\n"),
+      description: '<![CDATA[' + File.read(File.join(dir, 'README')).chomp.sub(/^\n/, '').sub("\n\n", "\n") + ']]>',
       pubDate: updated.httpdate
-      # 'dc:date' => updated.rfc3339
     }
   end
 end
+
+# duplication of music/video posts?
+# Dir[File.join(SOCIAL, '*.json')].each do |json|
+#   note = JSON.parse(File.read(json))
+#   next unless note['attributedTo'] == 'https://social.pdp8.info/pdp8' and note['to'].include?('https://www.w3.org/ns/activitystreams#Public')
+#
+#   # TODO: add enclosures for attachments
+#   # tags
+#   items << {
+#     title: note['published'],
+#     link: 'https://pdp8.info/social/create.html#' + note['published'],
+#     guid: note['id'],
+#     description: '<![CDATA[' + note['content'] + ']]>',
+#     pubDate: Date.parse(note['published']).httpdate
+#   }
+# end
+
 date = Dir[File.join(MEDIA, 'pictures', '*.jpeg')].last.split('_')[0]
-updated = Date.parse(date) # .strftime('%Y%m%d')
+updated = Date.parse(date)
 items << {
   title: 'pictures',
   link: 'https://pdp8.info/pictures.html',
   guid: 'https://pdp8.info/pictures.html',
   pubDate: updated.httpdate
-  # 'dc:date' => updated.rfc3339
 }
 
 xml = ['<?xml version="1.0" encoding="UTF-8"?>
 <rss version="2.0"
   xmlns:content="http://purl.org/rss/1.0/modules/content/"
+  xmlns:atom="http://www.w3.org/2005/Atom">
   <channel>
     <title>pdp8</title>
     <link>https://pdp8.info</link>
     <description>music, pictures and videos</description>
-    <language>en</language>']
+    <language>en</language>
+    <atom:link href="https://pdp8.info/rss.xml" rel="self" type="application/rss+xml" /> ']
+
 date = DateTime.now
 xml << "    <pubDate>#{date.httpdate}</pubDate>"
-# xml << "    <dc:date>#{date.rfc3339}</dc:date>"
 
 items.sort_by { |i| i['pubDate'] }.each do |item|
   xml << '    <item>'
author	pdp8 <pdp8@pdp8.info>	2024-03-04 13:42:04 +0100
committer	pdp8 <pdp8@pdp8.info>	2024-03-04 13:42:04 +0100
commit	6b40f3b54efabb740e825ba3d94b5695c2b98ede (patch)
tree	2e2fae35cae72f73878cd5295e0f183891e284dc
parent	4bd4cb578ee4579eaa81efec4b478654fae8e236 (diff)