[Rt-commit] r5860 - in Mnemonic: . lib lib/Mnemonic/Backend lib/Mnemonic/Stash

jesse at bestpractical.com jesse at bestpractical.com
Mon Sep 4 20:41:45 EDT 2006


Author: jesse
Date: Mon Sep  4 20:41:44 2006
New Revision: 5860

Added:
   Mnemonic/lib/Mnemonic/Stash/
   Mnemonic/lib/Mnemonic/Stash/Checksum.pm
Modified:
   Mnemonic/   (props changed)
   Mnemonic/bin/mnemonic
   Mnemonic/lib/Mnemonic.pm
   Mnemonic/lib/Mnemonic/Backend/S3.pm
   Mnemonic/lib/Mnemonic/Backend/Tmp.pm

Log:
 r27042 at pinglin:  jesse | 2006-09-04 20:41:35 -0400
 * Started to support a local cache of uploaded files and hashes.


Modified: Mnemonic/bin/mnemonic
==============================================================================
--- Mnemonic/bin/mnemonic	(original)
+++ Mnemonic/bin/mnemonic	Mon Sep  4 20:41:44 2006
@@ -34,8 +34,8 @@
             $b->remove_key_from_store($argv{'delete_key'});
     }
     if ($argv{backup}) {
-    my @items    = $b->list_backups();
-    my $manifest = $b->upload(
+        my @items    = $b->list_backups();
+    my $manifest = $b->backup(
         %argv,
         stored_keys => \@items
     );

Modified: Mnemonic/lib/Mnemonic.pm
==============================================================================
--- Mnemonic/lib/Mnemonic.pm	(original)
+++ Mnemonic/lib/Mnemonic.pm	Mon Sep  4 20:41:44 2006
@@ -10,12 +10,13 @@
 use File::Path                ();
 use UNIVERSAL::require        ();
 use Mnemonic::FileSet         ();
+use Mnemonic::Stash           ();
 use Mnemonic::Crypto::OpenPGP ();
 use Shell::Command            ();
 
 use base qw/Class::Accessor/;
 
-BEGIN { __PACKAGE__->mk_accessors(qw/backend config_file config pgp/); }
+BEGIN { __PACKAGE__->mk_accessors(qw/backend config_file config pgp stash/); }
 
 our $CHUNKSIZE = 1024000;
 
@@ -29,6 +30,11 @@
     $backend->require() || die $@;
     $self->backend( $backend->new( { config => $self->config() } ) );
     $self->backend->init();
+    warn "newing stash";
+    $self->stash( Mnemonic::Stash->new( { config => $self->config } ) );
+    warn "intting";
+    $self->stash->init();
+    warn "STash initted";
 }
 
 sub load_config {
@@ -54,7 +60,7 @@
     return ( $self->backend->get_keys(@_) );
 }
 
-sub upload {
+sub backup {
     my $self = shift;
     my %args = (
         stored_keys => undef,
@@ -63,29 +69,37 @@
         dry_run     => undef,
         @_
     );
-
-    my $manifest = {};
+    warn "Backing up";
+    my $manifest      = {};
     my $seen_checksum = {};
     $seen_checksum->{$_} = "prestored" for ( @{ $args{'stored_keys'} } );
-    my $on_match=       sub {
-                my $checksum = $self->store_item( manifest => $manifest, filename => $File::Find::name, stored_keys => $seen_checksum); 
-                $seen_checksum->{$checksum} = $File::Find::name;
-            }; 
+    my $on_match = sub {
+        my $checksum = $self->store_item(
+            manifest    => $manifest,
+            filename    => $File::Find::name,
+            stored_keys => $seen_checksum
+        );
+        $seen_checksum->{$checksum} = $File::Find::name;
+    };
     my $search = Mnemonic::FileSet->new(
-        { search_paths => $args{'path'}, skip_patterns => $args{'skip'} ,
-          on_match => $on_match
-    },
-    
+        {   search_paths  => $args{'path'},
+            skip_patterns => $args{'skip'},
+            on_match      => $on_match
+        },
+
     );
     my @manifest = $search->search();
 
-
     my $hostname = `hostname`;
     chomp $hostname;
-    my $manifest_id = $self->pgp->key_id .'/MANIFEST/' . $hostname . "/" . time() . '-' . $$;
+    my $manifest_id = $self->pgp->key_id
+        . '/MANIFEST/'
+        . $hostname . "/"
+        . time() . '-'
+        . $$;
     my $manifest_yaml = YAML::Syck::Dump($manifest);
-
-    $self->backend->store( $manifest_id => $self->pgp->encrypt($manifest_yaml) );
+    $self->backend->store(
+        $manifest_id => $self->pgp->encrypt($manifest_yaml) );
     warn "Completed backup $manifest_id\n";
     return $manifest_id;
 }
@@ -102,10 +116,9 @@
     my $manifest = $args{'manifest'};
 
     my @path = File::Spec->splitdir($filename);
-    pop @path; shift @path; # remove the filename and the leading slash
+    pop @path;
+    shift @path;    # remove the filename and the leading slash
     my $path = '';
-   
-
 
     while ( my $dir = shift @path ) {
         $path = File::Spec->catdir( $path, $dir );
@@ -120,7 +133,6 @@
 
     }
 
-
     if ( -d $filename ) {
         $manifest->{$filename} = {
             type     => 'directory',
@@ -131,47 +143,79 @@
     } elsif ( -f $filename ) {
         if ( -z $filename ) {
             $manifest->{$filename} = {
-                type     => 'file',
-                size     => '0',
-                statinfo => [ stat $filename ],
-                stored   => '0'
+                type        => 'file',
+                size        => '0',
+                statinfo    => [ stat $filename ],
+                tunesttored => '0'
                 }
 
         } else {
 
-
             eval {
 
-                    my @statinfo =  stat $filename;
+                my @statinfo = stat $filename;
+                if ( my $record = $self->stash->already_stored( $self->backend, $filename ,  \@statinfo) )
+                {
+                    warn "\t. " . $filename . " already stored (cache id " . $record->id . ")\n";
+                    $manifest->{$filename} = {
+                        subkeys           => $record->subkeys,
+                        statinfo          => $record->statinfo,
+                        stored            => 1,
+                        sha256            => $record->sha256,
+                        type              => 'file',
+                        using_cached_item => $record->id
 
-    my $len = $statinfo[7];
-    my $start_at = 0;
-    my @subkeys;
-    open(my $fh,  "<", $filename) || die $!;
-    while ( $start_at <= $len ) {
-        my $chunk_ref = $self->read_chunk( $fh, $CHUNKSIZE );
-        my $checksum = $self->_get_checksum($chunk_ref);
-        if ( $args{'stored_keys'}->{$checksum} ) {
-            warn "\t= $filename already stored\n";
-        } else {
-             $self->encrypt_and_store($checksum => $chunk_ref);
-            warn "\t+ $filename (from " . $start_at . ") uploaded\n";
-        }
-        push @subkeys, $checksum;
-        $start_at += $CHUNKSIZE;
-    }
-    close ($fh);
+                    };
+                } else { 
+                my $sha256 = $self->_sha256($filename);
+                if ( my $record = $self->stash->has_hash_of_file_content($self->backend, $sha256)) {
+                    warn "\t. " . $filename . "  matches already stored content (cache id " . $record->id . ")\n";
+                    $manifest->{$filename} = {
+                        subkeys           => $record->subkeys,
+                        statinfo          => [stat($filename)],
+                        stored            => 1,
+                        sha256            => $sha256,
+                        type              => 'file',
+                        using_cached_content => $record->id
 
+                    };
+                    $self->stash->store_file_info( %{ $manifest->{$filename} }, filename => $filename, backend  => $self->backend);
+                }
 
 
+                else {
 
-                $manifest->{$filename} = {
-                    keys      => \@subkeys,
-                    statinfo => \@statinfo,
-                    stored   => 1,
-                    sha256 => $self->_sha256($filename),
-                    type     => 'file'
-                };
+                    my $len      = $statinfo[7];
+                    my $start_at = 0;
+                    my @subkeys;
+                    open( my $fh, "<", $filename ) || die $!;
+                    while ( $start_at <= $len ) {
+                        my $chunk_ref = $self->read_chunk( $fh, $CHUNKSIZE );
+                        my $checksum = $self->_get_checksum($chunk_ref);
+                        if ( $args{'stored_keys'}->{$checksum} ) {
+                            warn "\t= $filename already stored\n";
+                        } else {
+                            $self->encrypt_and_store(
+                                $checksum => $chunk_ref );
+                            warn "\t+ $filename (from "
+                                . $start_at
+                                . ") stored\n";
+                        }
+                        push @subkeys, $checksum;
+                        $start_at += $CHUNKSIZE;
+                    }
+                    close($fh);
+
+                    $manifest->{$filename} = {
+                        subkeys  => \@subkeys,
+                        statinfo => \@statinfo,
+                        stored   => 1,
+                        sha256   => $sha256,
+                        type     => 'file'
+                    };
+                    $self->stash->store_file_info( %{ $manifest->{$filename} }, filename => $filename, backend  => $self->backend);
+                }
+            }
             };
             if ($@) {
                 $manifest->{'!errors'}->{$filename} = $@;
@@ -197,7 +241,8 @@
     my $result = $self->backend->fetch( $args{'manifest_id'} );
 
     #{ content_type, etag, value, @meta }
-    my $manifest = YAML::Syck::Load( $self->pgp->decrypt( $result->{'value'}));
+    my $manifest
+        = YAML::Syck::Load( $self->pgp->decrypt( $result->{'value'} ) );
 
     foreach my $file ( sort keys %$manifest ) {
         my @path = File::Spec->splitdir($file);
@@ -239,17 +284,17 @@
 
     if ( ( $manifest_entry->{stored} || 0 ) > 0 ) {
 
-            open( my $outfile, ">", $target ) || die $!;
-                        binmode($outfile);
-        foreach my $key (@{$manifest_entry->{keys}}) {
-
-        my $file_result = $self->backend->fetch( $key) ;
-        my $pt = $self->pgp->decrypt( $file_result->{'value'});
-        warn "\t\trestoring ".$key."\n";
-                                    print $outfile $pt || die $!;
-                                }
+        open( my $outfile, ">", $target ) || die $!;
+        binmode($outfile);
+        foreach my $key ( @{ $manifest_entry->{subkeys} } ) {
+
+            my $file_result = $self->backend->fetch($key);
+            my $pt          = $self->pgp->decrypt( $file_result->{'value'} );
+            warn "\t\trestoring " . $key . "\n";
+            print $outfile $pt || die $!;
+        }
 
-                                                close $outfile     || die $!;
+        close $outfile || die $!;
 
         unless ( $manifest_entry->{sha256} eq $self->_sha256($target) ) {
             warn "$file has an invalid SHA256 sum after decryption";
@@ -278,8 +323,9 @@
     my $self = shift;
     my %args = (@_);
 
-    my $result   = $self->backend->fetch( $args{'manifest_id'} );
-    my $manifest = YAML::Syck::Load( $result->{value} );
+    my $result = $self->backend->fetch( $args{'manifest_id'} );
+    my $manifest
+        = YAML::Syck::Load( $self->pgp->decrypt( $result->{value} ) );
     warn "This backup would restore:\n";
     foreach my $file ( sort keys %$manifest ) {
         print $file . "\n";
@@ -288,22 +334,22 @@
 }
 
 sub _sha256 {
-    my $self =shift;
+    my $self = shift;
     my $item = shift;
-    my $sha      = Digest::SHA->new('sha256');
+    my $sha  = Digest::SHA->new('sha256');
 
-    if (ref($item) eq 'SCALAR') {
+    if ( ref($item) eq 'SCALAR' ) {
         $sha->add($$item);
     } else {
         $sha->addfile($item);    # feed data into stream
     }
-    return  $sha->hexdigest;
+    return $sha->hexdigest;
 
 }
 
 sub _get_checksum {
     my $self = shift;
-    return $self->pgp->key_id ."/". $self->_sha256(@_);
+    return $self->pgp->key_id . "/" . $self->_sha256(@_);
 }
 
 sub update_statinfo {
@@ -323,43 +369,33 @@
 
 }
 
-
-
-
-
 =head2 read_chunk filename offset length
 
 returns a reference to the content it got.
 
 =cut
 
-
 sub read_chunk {
-    my $self = shift; 
-    my $fh =shift;
+    my $self   = shift;
+    my $fh     = shift;
     my $length = shift;
 
     my $content;
-    binmode($fh)||die $!;
-    read($fh, $content, $length)||die $!;
+    binmode($fh) || die $!;
+    read( $fh, $content, $length ) || die $!;
     return \$content;
 }
 
-
-
-                sub encrypt_and_store {
-                    my $self = shift;
-                    my $key = shift;
-                    my $plaintext_ref = shift;
-
-                    my $cyphertext = $self->pgp->encrypt($$plaintext_ref);
-                    eval {
-                        $self->backend->store( $key => $cyphertext, );
-                    };
-                    if ($@) {
-                        warn "\t!ERROR: $key: $@\n";
-                    }
-                }
-
+sub encrypt_and_store {
+    my $self          = shift;
+    my $key           = shift;
+    my $plaintext_ref = shift;
+
+    my $cyphertext = $self->pgp->encrypt($$plaintext_ref);
+    eval { $self->backend->store( $key => $cyphertext, ); };
+    if ($@) {
+        warn "\t!ERROR: $key: $@\n";
+    }
+}
 
 1;

Modified: Mnemonic/lib/Mnemonic/Backend/S3.pm
==============================================================================
--- Mnemonic/lib/Mnemonic/Backend/S3.pm	(original)
+++ Mnemonic/lib/Mnemonic/Backend/S3.pm	Mon Sep  4 20:41:44 2006
@@ -6,7 +6,7 @@
 
 use base 'Class::Accessor';
 
-BEGIN{__PACKAGE__->mk_accessors(qw/s3 bucket config/)};
+BEGIN{__PACKAGE__->mk_accessors(qw/s3 bucket config id/)};
 
 use vars qw/$OWNER_ID $OWNER_DISPLAYNAME/;
 
@@ -45,6 +45,8 @@
             or die $self->s3->err . ": " . $self->s3->errstr;
     }
 
+    $self->id($aws_access_key_id.":".$bucketname);
+
 }
 
 sub get_keys {

Modified: Mnemonic/lib/Mnemonic/Backend/Tmp.pm
==============================================================================
--- Mnemonic/lib/Mnemonic/Backend/Tmp.pm	(original)
+++ Mnemonic/lib/Mnemonic/Backend/Tmp.pm	Mon Sep  4 20:41:44 2006
@@ -9,24 +9,34 @@
 use base 'Class::Accessor';
 use File::Find::Rule;
 
-__PACKAGE__->mk_accessors(qw/path/);
+
+
+__PACKAGE__->mk_accessors(qw/path id/);
 
 sub init {
     my $self = shift;
+    my $host =`hostname`;
+    chomp $host;
+
+
     $self->path('/tmp/mnemonic');
     mkdir ($self->path);
+    $self->id($host.":" . $self->path);
+
 }
 
-sub get_keys {
-    my $self     = shift;
-    my %args = (prefix => '',
-               @_);
+ sub get_keys {
+     my $self     = shift;
+     my %args = (prefix => '',
+                @_);
          my @files = File::Find::Rule->file()
                                      ->in($self->path );
-        my $path     = $self->path;
-        my $matching = $args{prefix};
-    return (grep {  $_ =~ s/^$path\/// && $_ =~ /^$matching/ } @files);
-}
+         my $path     = $self->path;
+         my $matching = $args{prefix};
+   return (grep {  $_ =~ s/^$path\/// && $_ =~ /^$matching/ } @files);
+ }
+ 
+
 
 sub store {
     my $self = shift;
@@ -47,7 +57,8 @@
     my $self = shift;
     my $key = shift;
     my $tmp;
-    io($self->resolve_key($key)) > $tmp;
+    eval {io($self->resolve_key($key)) > $tmp;};
+    if ($@) { warn $@}
     return { value => $tmp, 
              etag => Digest::MD5::md5_hex($tmp) 
         };

Added: Mnemonic/lib/Mnemonic/Stash/Checksum.pm
==============================================================================
--- (empty file)
+++ Mnemonic/lib/Mnemonic/Stash/Checksum.pm	Mon Sep  4 20:41:44 2006
@@ -0,0 +1,22 @@
+package Mnemonic::Stash::Checksum;
+
+use warnings;
+use strict;
+
+use Jifty::DBI::Schema;
+use Jifty::DBI::Record schema {
+    column mtime    => type is 'integer';
+    column ctime    => type is 'integer';
+    column upload_time => type is 'integer';
+    column stored_to => type is 'text';
+    column filesize => type is 'integer';
+    column sha256 => type is 'text';
+    column path     => type is 'text';
+    column object_type     => type is 'text';
+    column subkeys => type is 'text', filters are 'Jifty::DBI::Filter::Storable';
+    column statinfo => type is 'text', filters are 'Jifty::DBI::Filter::Storable';
+};
+
+
+
+1;


More information about the Rt-commit mailing list