package Restriction; # # A class to find locations of restriction enzyme recognition sites in # DNA sequence data. # use strict; use warnings; use Carp; # Class data and methods { # A list of all attributes with default values. # "enzyme" is given as an argument possibly multiple time, set as key to _map hash my %_attributes = ( _rebase => { }, # A Rebase.pm hash-based object # key = restriction enzyme name # value = space-separated string of recognition sites => regular expressions _sequence => '', # DNA sequence data in raw format (only bases) _map => { }, # a hash: keys are enzyme names, values are arrays of locations _enzyme => '', # space- or comma-separated enzyme names, set as key to _map hash ); # Global variable to keep count of existing objects my $_count = 0; # Return a list of all attributes sub _all_attributes { keys %_attributes; } # Manage the count of existing objects sub get_count { $_count; } sub _incr_count { ++$_count; } sub _decr_count { --$_count; } } # The "new" constructor method, called from class, e.g. sub new { my ($class, %args) = @_; # Create a new object my $self = bless {}, $class; # Set the attributes for the provided arguments foreach my $attribute ($self->_all_attributes()) { # E.g. attribute = "_name", argument = "name" my($argument) = ($attribute =~ /^_(.*)/); if (exists $args{$argument}) { if($argument eq 'enzyme') { # permit space or comma separated enzyme names $args{$argument} =~ s/,/ /g; } $self->{$attribute} = $args{$argument}; } } # Check that the correct arguments are given if( not defined $self->{_rebase} ) { croak "A Rebase object must be given as an argument"; }elsif( ref($self->{_rebase}) ne 'Rebase' ) { croak "The argument to rebase is not a Rebase object"; }elsif( not defined $self->{_sequence} ) { croak "A sequence must be given as an argument"; } # Calculate the locations for each enzyme, store in _map hash attribute foreach my $enzyme (split(" ", $self->{_enzyme})) { $self->map_enzyme($enzyme); } $self->_incr_count; return $self; } # For this simple class I have no AUTOLOAD or DESTROY # No get_rebase method, I don't want to pass around a huge hash # No set mutators: all initialization done by way of "new" constructor # No clone method. Each sequence and set of enzymes can be easily calculated # by means of a "new" command. sub map_enzyme { my($self, $enzyme) = @_; my(@positions) = (); my(@res) = $self->get_regular_expressions($enzyme); foreach my $re (@res) { push @positions, $self->match_positions($re); } @{$self->{_map}{$enzyme}} = @positions; return @positions; } sub get_regular_expressions { my($self, $enzyme) = @_; my(%sites) = split(' ', $self->{_rebase}{_rebase}{$enzyme}); # May have duplicate values return values %sites; } # Find positions of a regular expression in the sequence sub match_positions { my($self, $regexp) = @_; my @positions = ( ); # Determine positions of regular expression matches while ( $self->{_sequence} =~ /$regexp/ig ) { push @positions, ($-[0] + 1 ); } return(@positions); } sub get_enzyme_map { my($self, $enzyme) = @_; @{$self->{_map}{$enzyme}}; } sub get_enzyme_names { my($self) = @_; keys %{$self->{_map}}; } sub get_sequence { my($self) = @_; $self->{_sequence}; } sub get_map { my($self) = @_; %{$self->{_map}}; } =head1 Restriction Restriction: Given a Rebase object, sequence, and list of restriction enzyme names, return the locations of the recognition sites in the sequence =head1 Synopsis use Restriction; use Rebase; use strict; use warnings; my $rebase = Rebase->new( dbmfile => 'BIONET', bionetfile => 'bionet.212' ); my $restrict = Restriction->new( rebase => $rebase, enzyme => 'EcoRI, HindIII', sequence => 'ACGAATTCCGGAATTCG', ); print "Locations for EcoRI are ", join(' ', $restrict->get_enzyme_map('EcoRI')), "\n"; =head1 AUTHOR James Tisdall =head1 COPYRIGHT Copyright (c) 2003, James Tisdall =cut 1;