#!/usr/bin/perl -w # # $Id: mkgj,v 1.87 2023/11/19 15:19:43 kili Exp $ # # mkgj -- mirror karopapier game jsons use strict; use open ':encoding(UTF-8)'; use File::Path 'make_path'; use Getopt::Std; use HTTP::Response; use JSON; use LWP::UserAgent; use List::Util qw(max maxstr); use Sys::Syslog qw(openlog syslog LOG_USER LOG_ERR LOG_WARNING LOG_INFO); use Time::Local 'timelocal_modern'; sub usage { print STDERR "usage: mkgj [GID ...]\n"; print STDERR " mkgj -r\n"; exit(1); } my %opts; usage unless getopts('r', \%opts); my $f_r = $opts{'r'} || 0; # re-process local files, generating # players.json from scratch and possibly # (re) move files when game state and/or # iq state differ. Useful after changing # the players calculation or the # classification as iq game. usage if $f_r && @ARGV; # NOTE: response is capped at 100 entries, regardless of the limit! use constant KARO_GAMES_FMT => 'https://www.karopapier.de/api/games?sort=gid&offset=%d&limit=100%s'; use constant KARO_GAME_FMT => 'https://www.karopapier.de/api/games/%d?moves=1'; my (%fin_gids, %run_gids, %bro_gids, %players); my $changed = 0; my @broken = (); openlog('mkgj', 'pid', LOG_USER); $SIG{__DIE__} = sub { syslog(LOG_ERR, "Ouch: %s.", shift()) }; my $user = $< == 1000 ? "kili" : "karo"; chdir("/home/$user/karojson") or die "/home/$user/karojson: $!"; umask 0022; # This will automatically fail with Carp::croak() if a (fatal) error # occurs. make_path("all", "finished", "broken", "running", "iq-finished", "tmp"); # Make only this directory group writable, in case there's a temporar # error for a game, so other people in the same group can remove such # files. chmod(0775, "broken") or die "broken: $!"; my $ua = LWP::UserAgent->new( env_proxy => 1, ) or die "$!\n"; $ua->agent('mkgj/1.0'); # I used to use {en,de}code_json, but aristarch does a bad job parsing # JSON (relying on the order of keys). my $json = JSON->new->utf8->canonical; # Return 1 if arrays referenced by the two parameters are equal, 0 # otherwise. The arrays are assumed to contain only scalars. sub array_eq { my ($a, $b) = @_; my @a = @$a; my @b = @$b; while (@a && @b) { return 0 if (shift(@a) != shift(@b)); } return (!@a && !@b); } # Fetch a chunk of games infos starting at offset $1. $2 determines # wether finished or running game infos are fetched. # Returns a list of (up to 100 or whatever the maximum number of # game infos returned by the API is) gids. sub fetch_list_chunk { my ($offset, $finished) = @_; # For KARO_GAMES_FMT: with 'finished=0', we still get the # finished games. To get the running games, the parameter # 'finished' must be omitted completely. my $f = $finished ? '&finished=1' : ''; my $url = sprintf(KARO_GAMES_FMT, $offset, $f); my $resp = $ua->get($url) or die "$url: $!\n"; my $rc = $resp->code; my $rm = $resp->message; die "$url: $rc $rm\n" unless $rc == 200; my $jref = $json->decode($resp->content); # XXX: try to do this with map(). my @gids = (); foreach (@$jref) { my %gi = %$_; push @gids, $gi{id}; } return @gids; } # Fetch gids. $_[0] determines wether finished or running game infos # are fetched. # Returns a list of all finished or runnign gids. sub fetch_list { my $finished = shift; my @gids = (); my $offset = 0; while (my @chunk = fetch_list_chunk($offset, $finished)) { $offset += @chunk; push @gids, @chunk; } return @gids; } # Fetch gids of all running and finished games (in this order). sub fetch_all_list { syslog(LOG_INFO, "listing running games"); my @gids = fetch_list(0); # Since we can't fetch gids in an atomic way, and because we can # only specify offsets instead of starting gids, there is a race # condition that may cause some gids to be lost. For example, # if during fetching chunk N some game from this or any earler # chunk is finished, the offset used for fetching the next chunk # will be too large, and the next chunk will not contain gid of # the first running game after the last gid from chunk N. # To attack this problem, we repeast listing until we get # exactly the same lists in a row. syslog(LOG_INFO, "listing running games again"); for (my @gids2 = fetch_list(0); !array_eq(\@gids, \@gids2);) { syslog(LOG_WARNING, "list of running games differs"); syslog(LOG_INFO, "listing running games again"); @gids = @gids2; } syslog(LOG_INFO, "listing finished games"); push @gids, fetch_list(1); # Remove duplicate entries. my %seen; @gids = grep(!$seen{$_}++, @gids); return @gids; } # Fetch a specific game info and return the content. sub fetch_gid { my $gid = shift; my $url = sprintf(KARO_GAME_FMT, $gid); my $resp = $ua->get($url) or die "$url: $!\n"; my $rc = $resp->code; my $rm = $resp->message; # There is at least one gid (879) for which the server returns # an internal server error (funny enough, only with parameter # moves=1). In this case, we intentionally return undef to # indicate the fact. if ($rc == 500) { syslog(LOG_ERR, "gid $gid: internal server error"); return undef; } elsif ($rc == 404) { # Quick hack to indicate that we got a 404: return '{"404":404}'; } elsif ($rc != 200) { syslog(LOG_ERR, "$url: $rc $rm\n"); return undef; } return $resp->content; } # Update players.json. sub update_players { my $js = $json->encode(\%players) or die "players: %!"; open(my $fh, ">", "tmp/players.json") or die "tmp/players.json: $!"; print $fh $js; print $fh "\n"; close($fh); rename("tmp/players.json", "players.json") or die ("players.json: $!"); } # Load game data for the gid given in the first argument. # Returns 0 if we got a 404, 1 otherwise. sub load_game { my $gid = shift; my $game_json; if (!$f_r) { $game_json = fetch_gid($gid); if (not $game_json ) { syslog(LOG_INFO, "retrying after 5 seconds..."); sleep(5); $game_json = fetch_gid($gid); } if (not $game_json) { # Just store broken games in the corresponding # directory, as an empty file. # XXX What if we already have this game in # running or finished? open(my $fh, ">", "broken/$gid.json") or die "broken/$gid: $!"; push @broken, $gid; return 1; } } else { open(my $fh, "<", "all/$gid.json") or die "all/$gid.json: $!"; local $/ = undef; $game_json = <$fh>; close($fh); } my $gref = $json->decode($game_json); return 0 if exists($gref->{404}); # Already locally stored as finished, no more checks necessary. return 1 if (!$f_r && $gref->{finished} && exists($fin_gids{$gid})); # Use the maximum timestamp from all player's 'motion' data # and if no player has such data (which is the case right # after a games has been started), use the 'starteddate' field # of the game. # We could also check wether the game is finished and use the # finisheddate in this case, but we have to peek at all players, # anyway, to update their infos. my $this_playersr = $gref->{players}; my @this_players = map({ id => $_->{id}, name => $_->{name}, color => $_->{color}, lastmove => $_->{motion}->{t} }, @$this_playersr); my @ts = map($_->{lastmove}, grep(defined($_->{lastmove}), @this_players)); my $ts = @ts ? maxstr(@ts) : $gref->{starteddate}; my @td; if (!(@td = $ts =~ m/^(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)$/)) { syslog(LOG_ERR, "garbled time stamp $ts in gid $gid"); @td = (0, 1, 1, 0, 0, 0); }; my ($y, $m, $d, $h, $M, $s) = @td; if ($y < 0 || $m < 1 || $d < 1 || $h < 0 || $M < 0 || $s < 0) { syslog(LOG_ERR, "garbled time stamp $ts in gid $gid"); ($y, $m, $d, $h, $M, $s) = (0, 1, 1, 0, 0, 0); } my $t = timelocal_modern($s, $M, $h, $d, $m - 1, $y); if (!$f_r && !$gref->{finished} && exists($run_gids{$gid})) { # Game stored as running; no state change. # Check wether the upstream version is newer. my @sd = stat("running/$gid.json"); my $mtime = $sd[9]; return 1 if ($t == $mtime); } # Games tagged with "!KaroIQ" are KaroIQ games, # but only if there's more than one player. my $iq = grep(/^!KaroIQ!$/, @{$gref->{tags}}) && @this_players > 1; my ($total_inc, $running_inc); if (!$f_r && $gref->{finished} && exists($run_gids{$gid})) { # State change from running to finished. $total_inc = 0; $running_inc = -1; } elsif ($gref->{finished}) { # New finished game. $total_inc = 1; $running_inc = 0; } elsif (!$f_r && exists($run_gids{$gid})) { # Existing running game. $total_inc = $running_inc = 0; } else { # New running game. $total_inc = $running_inc = 1; } my $total_iq_inc = $iq ? $total_inc : 0; my $running_iq_inc = $iq ? $running_inc : 0; foreach (@this_players) { my $id = $_->{id}; my $lm = $_->{lastmove}; if (exists($players{$id})) { $players{$id}->{total} += $total_inc; $players{$id}->{total_iq} += $total_iq_inc; $players{$id}->{running} += $running_inc; $players{$id}->{running_iq} += $running_iq_inc; # Don't update name, color and lastmove from # games older than $lm. # No move in this game yet. XXX: if the player # already has a lastmove, that should be # compared against the last move within this # game, and name and colour should be updated. next if !$lm; my $plm = $players{$id}->{lastmove}; next if $plm && ($plm cmp $lm) >= 0; $players{$id}->{name} = $_->{name}; $players{$id}->{color} = $_->{color}; $players{$id}->{lastmove} = $lm; } else { $players{$id} = { name => $_->{name}, color => $_->{color}, lastmove => $lm, total => $total_inc, total_iq => $total_iq_inc, running => $running_inc, running_iq => $running_iq_inc }; } } # Some additional sanity checks and repair attempts in rebuild # mode. if ($f_r && !$gref->{finished} && !exists($run_gids{$gid})) { syslog(LOG_ERR, "gid $gid running but not stored"); link("all/$gid.json", "running/$gid.json") or die "running/$gid.json: $!"; } if ($f_r && !$gref->{finished} && exists($fin_gids{$gid})) { syslog(LOG_ERR, "gid $gid running but stored as finished"); unlink("finished/$gid.json") or die "finished/$gid.json: $!"; # TODO: also remove from iq/finished, which needs to use # a glob because of the different names. } if ($f_r && $gref->{finished} && !exists($fin_gids{$gid})) { syslog(LOG_ERR, "gid $gid finished but not stored"); link("all/$gid.json", "finished/$gid.json") or die "finished/$gid.json: $!"; } if ($f_r && $gref->{finished} && exists($run_gids{$gid})) { syslog(LOG_ERR, "gid $gid finished but stored as running"); unlink("running/$gid.json") or die "running/$gid.json: $!"; } # In rebuild mode, don't write anything now. # players.json will be written once after the main loop. return 1 if $f_r; # Something is new or has changed, so update the player infos # and store the game and adjust the timestamp. update_players; open(my $fh, ">", "tmp/f") or die "tmp/f: $!"; print $fh $game_json; print $fh "\n"; close($fh); utime($t, $t, "tmp/f") or die "tmp/f: $!\n"; if ($gref->{finished}) { # Finished game, either completely new, or previously # stored in as running. # In both cases, the game is stored in finished (and # maybe iq-finished) and all. # TODO: try to remove from broken (just in case)? link("tmp/f", "tmp/a") or die "tmp/a: $!\n"; if ($iq) { my $dname = sprintf( "%s/%04d-%02d-%02d_%02d:%02d:%02d_%d.json", "iq-finished", $y, $m, $d, $h, $M, $s, $gid); link("tmp/f", "tmp/i") or die "tmp/f: $!\n"; rename("tmp/i", $dname) or die "$dname $!\n"; } rename("tmp/f", "finished/$gid.json") or die "finished/$gid.json: $!\n"; rename("tmp/a", "all/$gid.json") or die "all/$gid.json: $!\n"; if (exists($run_gids{$gid})) { # Previously stored in running. unlink("running/$gid.json") or die "running/$gid.json: $!\n"; syslog(LOG_INFO, "gid $gid: finished" . ($iq ? ", KaroIQ" : "")); } else { # This must have been a quick game, because it # first appears as finished. syslog(LOG_INFO, "gid $gid: new, finished" . ($iq ? ", KaroIQ" : "")); } } elsif (!$gref->{finished} && exists($run_gids{$gid})) { # Game stored as running; no state change, but updated. link("tmp/f", "tmp/a") or die "tmp/a: $!\n"; rename("tmp/f", "running/$gid.json") or die "running/$gid.json: $!\n"; rename("tmp/a", "all/$gid.json") or die "all/$gid.json: $!\n"; syslog(LOG_INFO, "gid $gid: updated"); } else { # New running game. link("tmp/f", "tmp/a") or die "tmp/a: $!\n"; rename("tmp/f", "running/$gid.json") or die "running/$gid.json: $!\n"; rename("tmp/a", "all/$gid.json") or die "all/$gid.json: $!\n"; syslog(LOG_INFO, "gid $gid: new, running"); } $changed++; return 1; } # - Remark: Games are stored in different directories 'finished' and # 'running' depending on the game state; the state of games fetched # from karopapier.de is always determined by parsing the JSON, # because a game's state may change between listing and fetching. # All fetched games will also be linked into directory 'all'. # 0. Read in player infos. if (!$f_r) { syslog(LOG_INFO, "loading player infos"); if (open(my $fh, "<", "players.json")) { local $/; my $js = <$fh>; close($fh); my $pref = $json->decode($js) or die "$!\n"; %players = %$pref; } elsif (!$!{ENOENT}) { die "players.json: $!"; } } # 1. Lookup already stored finished games and remember them as keys # in %fin_gids. Same for running games in %run_gids and broken game # in $bro_gids. syslog(LOG_INFO, "looking up stored games"); my $max_gid = 0; chdir("finished"); my @fin_gids = grep { $_ =~ /^\d+.json$/ } <'*.json'>; s/\.json$// for @fin_gids; $max_gid = max(@fin_gids) if @fin_gids; %fin_gids = map { $_ => undef } @fin_gids; @fin_gids = undef; chdir("../running"); my @run_gids = grep { $_ =~ /^\d+.json$/ } <'*.json'>; s/\.json$// for @run_gids; $max_gid = max($max_gid, @run_gids) if @run_gids; %run_gids = map { $_ => undef } @run_gids; chdir("../broken"); my @bro_gids = grep { $_ =~ /^\d+.json$/ } <'*.json'>; s/\.json$// for @bro_gids; %bro_gids = map { $_ => undef } @bro_gids; @bro_gids = undef; chdir(".."); syslog(LOG_INFO, %fin_gids . " finished, " . %run_gids . " running, " . %bro_gids . " broken"); # 2. If we don't have any games yet (i.e. first run), # list (via API), fetch and store all running and finished games # (in this order, i.e. running first), starting with an offset # of the GID determined in step 1. As mentioned above, the storage # location is based on the JSON data, not on which list it # appeared in. Finished games we already have stored locally are # not fetched again. # If we allready have local games, don't list but just try to update # all locally stored running games and then try peek for new games # starting with $max_gid + 1 until we hit a 404. # As a special rule, one or more GIDs can be speciefied in the # command line, in which case only those GIDs are fetched and # processed. This may be used for games marked as broken by accident. my @gids; if (@ARGV) { @gids = sort {$a <=> $b} @ARGV; $max_gid = 0; } elsif ($f_r) { @gids = sort {$a <=> $b} keys(%fin_gids), @run_gids; $max_gid = 0; } elsif ($max_gid) { @gids = sort {$a <=> $b} @run_gids; } else { @gids = grep { ! exists $fin_gids{$_} && ! exists $bro_gids{$_}} fetch_all_list(); } if ($f_r) { syslog(LOG_INFO, "reading " . scalar(@gids) . " games"); } else { syslog(LOG_INFO, "fetching " . scalar(@gids) . " games"); } foreach (@gids) { load_game($_); } if ($max_gid) { syslog(LOG_INFO, "looking for games after $max_gid"); while (load_game(++$max_gid)) {}; } # To reduce IO, players.json is only updated once. in rebuild mode. update_players if $f_r; syslog(LOG_INFO, "$changed games touched"); if (@broken) { syslog(LOG_WARNING, scalar(@broken) . " new broken games"); print("New broken games: @broken\n"); exit(1); }