From 6b40f3b54efabb740e825ba3d94b5695c2b98ede Mon Sep 17 00:00:00 2001 From: pdp8 Date: Mon, 4 Mar 2024 13:42:04 +0100 Subject: disallow AI bots in robots.txt, mail.rb title, valid rss --- Makefile | 2 +- html.rb | 7 ++++--- html/robots.txt | 33 +++++++++++++++++++++++++++++++-- html/sitemap.txt | 11 +++++++++++ html/style.css | 2 +- internetarchive.sh | 2 +- mail.rb | 20 +++++++++++++++----- rss.rb | 37 ++++++++++++++++++++++++++++++------- 8 files changed, 94 insertions(+), 20 deletions(-) create mode 100644 html/sitemap.txt diff --git a/Makefile b/Makefile index 19477c0..5acb8ec 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ videos: $(video_mp4) $(video_webp) climbing: $(climbing_mp4) $(climbing_webp) %.mp3: %.flac - ffmpeg -i $< -ab 256k -map_metadata 0 -id3v2_version 3 $@ + ffmpeg -i $< -y -vsync 0 -ab 256k -map_metadata 0 -id3v2_version 3 $@ %.webp: %.webm ffmpeg -i $< -hide_banner -loglevel error -vf thumbnail -frames:v 1 -c:v png -f image2pipe - | convert - $@ diff --git a/html.rb b/html.rb index 171d487..5f45ddc 100755 --- a/html.rb +++ b/html.rb @@ -38,7 +38,7 @@ def print_html(basename, html) end def music_html - music = Dir[File.join(MEDIA_DIR, 'music', '*')].sort.reverse + music = Dir[File.join(MEDIA_DIR, 'music', '20*')].sort.reverse html = File.read(File.join(SNIPPETS, 'head.html')) html += nav 'music' html += '
← prev | @@ -46,7 +46,7 @@ def music_html random | next →
' music.each do |dir| - next if dir.match 'alfadeo' + # next if dir.match 'alfadeo' date = File.basename(dir).split('_')[0] html += "
" @@ -185,5 +185,6 @@ puts `cp "#{File.join(WWW_DIR, last)}" "#{File.join(WWW_DIR, 'index.html')}"` '540px-PDP-8_.jpg', 'style.css', 'slideshow.js', - 'robots.txt' + 'robots.txt', + 'sitemap.txt' ].each { |f| puts `rsync -av "#{File.join(SNIPPETS, f)}" "#{WWW_DIR}"` } diff --git a/html/robots.txt b/html/robots.txt index eb05362..1bd8b9e 100644 --- a/html/robots.txt +++ b/html/robots.txt @@ -1,2 +1,31 @@ -User-agent: * -Disallow: +User-agent: Amazonbot +Disallow: / + +User-agent: ChatGPT-User +Disallow: / + +User-agent: GPTBot +Disallow: / + +User-agent: Google-Extended +Disallow: / + +User-agent: anthropic-ai +Disallow: / + +User-agent: Omgilibot +Disallow: / + +User-agent: Omgili +Disallow: / + +User-agent: FacebookBot +Disallow: / + +User-agent: Bytespider +Disallow: / + +User-agent: ImagesiftBot +Disallow: / + +SITEMAP: https://pdp8.info/sitemap.txt diff --git a/html/sitemap.txt b/html/sitemap.txt new file mode 100644 index 0000000..1dce6f3 --- /dev/null +++ b/html/sitemap.txt @@ -0,0 +1,11 @@ +https://pdp8.info +https://pdp8.info/about.html +https://pdp8.info/climbing.html +https://pdp8.info/code.html +https://pdp8.info/contact.html +https://pdp8.info/index.html +https://pdp8.info/music.html +https://pdp8.info/pictures.html +https://pdp8.info/social/announce.html +https://pdp8.info/social/create.html +https://pdp8.info/videos.html diff --git a/html/style.css b/html/style.css index 1d5bfab..dbf2307 100644 --- a/html/style.css +++ b/html/style.css @@ -52,7 +52,7 @@ nav a:hover { } .post { - margin: 1em; + margin: 2em 1em; } img, diff --git a/internetarchive.sh b/internetarchive.sh index d987095..ff53348 100755 --- a/internetarchive.sh +++ b/internetarchive.sh @@ -8,4 +8,4 @@ title=$(echo $album| sed 's/_/ /g') url=https://pdp8.info/music.html#$date description="$(sed 's/^$/
/' README)

original release: $url " description=$(echo $description) # remove newlines -ia upload pdp8_$album *flac cover.jpeg --metadata="title:$title" --metadata="mediatype:audio" --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0/" --metadata="date:$date" --metadata="description:$description" --metadata="artist:pdp8@pdp8.info" --metadata="album:$album" --metadata="creator:pdp8@pdp8.info" --metadata="genre:electronic,techno" \ No newline at end of file +ia upload pdp8_$album *flac cover.webp --metadata="title:$title" --metadata="mediatype:audio" --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0/" --metadata="date:$date" --metadata="description:$description" --metadata="artist:pdp8@pdp8.info" --metadata="album:$album" --metadata="creator:pdp8@pdp8.info" --metadata="genre:electronic,techno" diff --git a/mail.rb b/mail.rb index 35d495d..1aac68c 100755 --- a/mail.rb +++ b/mail.rb @@ -8,18 +8,28 @@ mailfile = if ARGV[1] and ARGV[1] == 'publish' Dir.chdir ARGV[0] date, title = File.basename(ARGV[0]).split('_', 2) +nr_tracks = Dir['*.flac'].size +type = if nr_tracks == 1 + 'single' + elsif nr_tracks < 6 + 'ep' + elsif nr_tracks > 0 + 'lp' + end content = ["From: info@pdp8.info -Subject: [pdp8] #{title.gsub('_', ' ')} +Subject: [pdp8] #{title.gsub('_', ' ')} #{type} Content-Type: text/plain List-Unsubscribe: "] content << '' -content << File.read('README') +content << "the new pdp8 #{type} \"#{title.gsub('_', ' ')}\" is online:" +# content << File.read('README').chomp content << '' -content << "https://pdp8.info/music.html\##{date}" +content << "web: https://pdp8.info/music.html\##{date}" +content << "internet archive: https://archive.org/details/pdp8_#{title}" bc = if File.exist?('bandcamp') - File.read('bandcamp').chomp + "bandcamp: #{File.read('bandcamp').chomp}" else - "https://pdp8.bandcamp.com/album/#{title.gsub(' ', '-')}" + "bandcamp: https://pdp8.bandcamp.com/album/#{title.gsub('_', '-')}" end content << bc content << '' diff --git a/rss.rb b/rss.rb index 554d45c..eb163d4 100755 --- a/rss.rb +++ b/rss.rb @@ -1,43 +1,66 @@ #!/usr/bin/env ruby require 'date' +require 'json' + MEDIA = '/srv/media/' +SOCIAL = '/srv/social/outbox/object/note/' WWW = '/srv/www/pdp8-test/' + items = [] %w[music videos].each do |cat| Dir[File.join(MEDIA, cat, '*')].each do |dir| + next unless File.basename(dir).match(/^\d/) + date = File.basename(dir).split('_')[0] - updated = Date.parse(date) # .strftime('%Y-%m-%d') + updated = Date.parse(date) items << { title: File.basename(dir).split('_')[1..-1].join(' '), link: File.join('https://pdp8.info', cat + '.html#' + date), guid: File.join('https://pdp8.info', cat + '.html#' + date), - description: File.read(File.join(dir, 'README')).chomp.sub(/^\n/, '').sub("\n\n", "\n"), + description: '', pubDate: updated.httpdate - # 'dc:date' => updated.rfc3339 } end end + +# duplication of music/video posts? +# Dir[File.join(SOCIAL, '*.json')].each do |json| +# note = JSON.parse(File.read(json)) +# next unless note['attributedTo'] == 'https://social.pdp8.info/pdp8' and note['to'].include?('https://www.w3.org/ns/activitystreams#Public') +# +# # TODO: add enclosures for attachments +# # tags +# items << { +# title: note['published'], +# link: 'https://pdp8.info/social/create.html#' + note['published'], +# guid: note['id'], +# description: '', +# pubDate: Date.parse(note['published']).httpdate +# } +# end + date = Dir[File.join(MEDIA, 'pictures', '*.jpeg')].last.split('_')[0] -updated = Date.parse(date) # .strftime('%Y%m%d') +updated = Date.parse(date) items << { title: 'pictures', link: 'https://pdp8.info/pictures.html', guid: 'https://pdp8.info/pictures.html', pubDate: updated.httpdate - # 'dc:date' => updated.rfc3339 } xml = [' pdp8 https://pdp8.info music, pictures and videos - en'] + en + '] + date = DateTime.now xml << " #{date.httpdate}" -# xml << " #{date.rfc3339}" items.sort_by { |i| i['pubDate'] }.each do |item| xml << ' ' -- cgit v1.2.3