package WARC::Record;						# -*- CPerl -*-

use strict;
use warnings;

use Carp;
use Scalar::Util;

our @ISA = qw();

require WARC; *WARC::Record::VERSION = \$WARC::VERSION;
require WARC::Date;

=head1 NAME

WARC::Record - one record from a WARC file

=head1 SYNOPSIS

  use WARC;		# or ...
  use WARC::Volume;	# or ...
  use WARC::Collection;

  # WARC::Record objects are returned from ->record_at and ->search methods

  # Construct a record, as when preparing a WARC file
  $warcinfo = new WARC::Record (type => 'warcinfo');

  # Accessors

  $value = $record->field($name);

  $version = $record->protocol;	# analogous to HTTP::Message::protocol
  $volume = $record->volume;
  $offset = $record->offset;
  $record = $record->next;

  $fields = $record->fields;

...

=cut

use overload '<=>' => 'compareTo';
use overload fallback => 1;

# This implementation uses a hash as the underlying object.
#  Keys defined by this class:
#
#   fields
#	Embedded WARC::Fields object

# This method can be overridden in subclasses for read-only objects.
sub _set {
  my $self = shift;
  my $key = shift;
  my $value = shift;

  $self->{$key} = $value;
}

=head1 DESCRIPTION

C<WARC::Record> objects come in two flavors with a common interface.
Records read from WARC files are read-only and have meaningful return
values from the methods listed in "Methods on records from WARC files".
Records constructed in memory can be updated and those same methods all
return undef.

=head2 Common Methods

=over

=item $record-E<gt>fields

Get the internal C<WARC::Fields> object that contains WARC record headers.

=cut

sub fields { (shift)->{fields} }

=item $record-E<gt>field( $name )

Get the value of the WARC header named $name from the internal
C<WARC::Fields> object.

=cut

sub field {
  my $self = shift;
  return $self->fields->field(shift);
}

=item $record E<lt>=E<gt> $other_record

=item $record-E<gt>compareTo( $other_record )

Compare two C<WARC::Record> objects according to a simple total order:
ordering by starting offset for two records in the same file, and by
filename of the containing C<WARC::Volume> objects for records in different
files.  Constructed C<WARC::Record> objects are assumed to come from a
volume named "" (the empty string) for this purpose, and are ordered in an
arbitrary but stable manner amongst themselves.  Constructed
C<WARC::Record> objects never compare as equal.

Perl constructs a C<==> operator using this method, so WARC record objects
will compare as equal iff they refer to the same physical record.

=cut

sub compareTo {
  my $a = shift;
  my $b = shift;
  my $swap = shift;

  # sort in-memory-only records ahead of on-disk records
  return $swap ? 1 : -1 if defined $b->volume;

  # neither record is from a WARC volume
  my $cmp = (Scalar::Util::refaddr $a) <=> (Scalar::Util::refaddr $b);

  return $swap ? 0-$cmp : 0+$cmp;
}

=back

=head3 Convenience getters

=over

=item $record-E<gt>type

Alias for C<$record-E<gt>field('WARC-Type')>.

=cut

sub type { (shift)->field('WARC-Type') }

=item $record-E<gt>id

Alias for C<$record-E<gt>field('WARC-Record-ID')>.

=cut

sub id { (shift)->field('WARC-Record-ID') }

=item $record-E<gt>date

Return the C<'WARC-Date'> field as a C<WARC::Date> object.

=cut

sub date { WARC::Date->from_string((shift)->field('WARC-Date')) }

=back

=head2 Methods on records from WARC files

These methods all return undef if called on a C<WARC::Record> object that
does not represent a record in a WARC file.

=over

=item $record-E<gt>protocol

Return the format and version tag for this record.  For WARC 1.0, this
method returns 'WARC/1.0'.

=cut

sub protocol { return undef }

=item $record-E<gt>volume

Return the C<WARC::Volume> object representing the file in which this
record is located.

=cut

sub volume { return undef }

=item $record-E<gt>offset

Return the file offset at which this record can be found.

=cut

sub offset { return undef }

=item $record-E<gt>next

Return the next C<WARC::Record> in the WARC file that contains this record.
Returns an undefined value if called on the last record in a file.

=cut

sub next { return undef }

=item $record-E<gt>open_block

Return a tied filehandle that reads the WARC record block.

The WARC record block is the content of a WARC record, analogous to the
entity body in an C<HTTP::Message>.

=cut

sub open_block { return undef }

=item $record-E<gt>replay

=item $record-E<gt>replay( as =E<gt> $type )

Return a protocol-specific object representing the record contents.

This method returns undef if the library does not recognize the protocol
message stored in the record and croaks if a requested conversion is not
supported.

A record with Content-Type "application/http" with an appropriate "msgtype"
parameter produces an C<HTTP::Request> or C<HTTP::Response> object.  An
unknown "msgtype" on "application/http" produces a generic
C<HTTP::Message>.  The returned object may be a subclass to support
deferred loading of entity bodies.

A request to replay a record "as =E<gt> http" attempts to convert whatever
is stored in the record to an HTTP exchange, analogous to the "everything
is HTTP" interface that C<LWP> provides.

=cut

sub replay { return undef }

=item $record-E<gt>open_payload

Return a tied filehandle that reads the WARC record payload.

The WARC record payload is defined as the decoded content of the protocol
response or other resource stored in the record.  This method returns undef
if called on a WARC record that has no payload or content that we do not
recognize.

=cut

sub open_payload { return undef }

=back

=head2 Methods on fresh WARC records

=over

=item $record = new WARC::Record (I<key> =E<gt> I<value>, ...)

Construct a fresh WARC record, suitable for use with C<WARC::Builder>.

=cut

sub new {
  my $class = shift;
  my %opt = @_;

  foreach my $name (qw/type/)
    { croak "required field '$name' not specified" unless $opt{$name} }

  my $fields = new WARC::Fields ('WARC-Type' => $opt{type});

  bless { fields => $fields }, $class;
}

=back

=cut

1;
__END__

=head1 AUTHOR

Jacob Bachmeyer, E<lt>jcb@cpan.orgE<gt>

=head1 SEE ALSO

L<WARC>, L<HTTP::Message>

L<WARC::Builder/"Extension subfield 'sl' in gzip header">

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2019 by Jacob Bachmeyer

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut
