From: the openembedded project
Date: Fri, 22 Jan 2010 11:05:07 +0100
Subject: [PATCH] ipkg-py-tarfile
---
arfile.py | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ipkg.py | 121 +++++++++++++++++++++++++++++++++---------------------------
setup.py | 2 +-
3 files changed, 192 insertions(+), 55 deletions(-)
create mode 100644 arfile.py
diff --git a/arfile.py b/arfile.py
new file mode 100644
index 0000000..0bcd8d0
--- /dev/null
+++ b/arfile.py
@@ -0,0 +1,124 @@
+"""
+arfile - A module to parse GNU ar archives.
+
+Copyright (c) 2006-7 Paul Sokolovsky
+This file is released under the terms
+of GNU General Public License v2 or later.
+"""
+import sys
+import os
+import tarfile
+
+
+class FileSection:
+ "A class which allows to treat portion of file as separate file object."
+
+ def __init__(self, f, offset, size):
+ self.f = f
+ self.offset = offset
+ self.size = size
+ self.seek(0, 0)
+
+ def seek(self, offset, whence = 0):
+# print "seek(%x, %d)" % (offset, whence)
+ if whence == 0:
+ return self.f.seek(offset + self.offset, whence)
+ elif whence == 1:
+ return self.f.seek(offset, whence)
+ elif whence == 2:
+ return self.f.seek(self.offset + self.size + offset, 0)
+ else:
+ assert False
+
+ def tell(self):
+# print "tell()"
+ return self.f.tell() - self.offset
+
+ def read(self, size = -1):
+# print "read(%d)" % size
+ return self.f.read(size)
+
+class ArFile:
+
+ def __init__(self, f):
+ self.f = f
+ self.directory = {}
+ self.directoryRead = False
+
+ signature = self.f.readline()
+ assert signature == "!\n"
+ self.directoryOffset = self.f.tell()
+
+ def open(self, fname):
+ if self.directory.has_key(fname):
+ return FileSection(self.f, self.directory[fname][-1], int(self.directory[fname][5]))
+
+ if self.directoryRead:
+ raise IOError, (2, "AR member not found: " + fname)
+
+ f = self._scan(fname)
+ if f == None:
+ raise IOError, (2, "AR member not found: " + fname)
+ return f
+
+
+ def _scan(self, fname):
+ self.f.seek(self.directoryOffset, 0)
+
+ while True:
+ l = self.f.readline()
+ if not l:
+ self.directoryRead = True
+ return None
+
+ if l == "\n":
+ l = self.f.readline()
+ if not l: break
+ descriptor = l.split()
+# print descriptor
+ size = int(descriptor[5])
+ memberName = descriptor[0][:-1]
+ self.directory[memberName] = descriptor + [self.f.tell()]
+# print "read:", memberName
+ if memberName == fname or (memberName.startswith("`") and memberName[1:] == fname):
+ # Record directory offset to start from next time
+ self.directoryOffset = self.f.tell() + size
+ return FileSection(self.f, self.f.tell(), size)
+
+ # Skip data and loop
+ data = self.f.seek(size, 1)
+# print hex(f.tell())
+
+
+if __name__ == "__main__":
+ if None:
+ f = open(sys.argv[1], "rb")
+
+ ar = ArFile(f)
+ tarStream = ar.open("data.tar.gz")
+ print "--------"
+ tarStream = ar.open("data.tar.gz")
+ print "--------"
+ tarStream = ar.open("control.tar.gz")
+ print "--------"
+ tarStream = ar.open("control.tar.gz2")
+
+ sys.exit(0)
+
+
+ dir = "."
+ if len(sys.argv) > 1:
+ dir = sys.argv[1]
+ for f in os.listdir(dir):
+ if not f.endswith(".ipk"): continue
+
+ print "=== %s ===" % f
+ f = open(dir + "/" + f, "rb")
+
+ ar = ArFile(f)
+ tarStream = ar.open("control.tar.gz")
+ tarf = tarfile.open("control.tar.gz", "r", tarStream)
+ #tarf.list()
+
+ f2 = tarf.extractfile("control")
+ print f2.read()
diff --git a/ipkg.py b/ipkg.py
index 824ba4d..67c2aa3 100644
--- a/ipkg.py
+++ b/ipkg.py
@@ -41,6 +41,8 @@ import re
import string
import commands
from stat import ST_SIZE
+import arfile
+import tarfile
class Version:
"""A class for holding parsed package version information."""
@@ -131,78 +133,63 @@ class Package:
self.section = None
self.filename_header = None
self.file_list = []
- self.md5 = None
+ # md5 is lazy attribute, computed on demand
+ #self.md5 = None
self.size = None
self.installed_size = None
self.filename = None
self.isdeb = 0
+ self.fn = fn
if fn:
# see if it is deb format
- f = open(fn, "r")
+ f = open(fn, "rb")
magic = f.read(4)
- f.close()
+ f.seek(0, 0)
if (magic == "! '2':
- # when using Python 2.0 or newer
- self.md5 = sum.hexdigest()
- else:
- self.md5 = string.join(map((lambda x:"%02x" % ord(x)),sum.digest()),'')
stat = os.stat(fn)
- self.size = stat[ST_SIZE]
+ self.size = stat[ST_SIZE]
self.filename = os.path.basename(fn)
## sys.stderr.write(" extracting control.tar.gz from %s\n"% (fn,))
- if self.isdeb:
- control = os.popen("ar p "+fn+" control.tar.gz | tar xfzO - './control'","r")
- else:
- control = os.popen("tar xfzO "+fn+" 'control.tar.gz' | tar xfzO - './control'","r")
- line = control.readline()
- while 1:
- if not line: break
- line = string.rstrip(line)
- lineparts = re.match(r'([\w-]*?):\s*(.*)', line)
- if lineparts:
- name = string.lower(lineparts.group(1))
- value = lineparts.group(2)
- while 1:
- line = control.readline()
- if not line: break
- if line[0] != ' ': break
- line = string.rstrip(line)
- value = value + '\n' + line
- # don't allow package to override its own filename
- if name == "filename":
- self.filename_header = value
- else:
- if self.__dict__.has_key(name):
- self.__dict__[name] = value
- else:
- line = control.readline()
+ if self.isdeb:
+ ar = arfile.ArFile(f)
+ tarStream = ar.open("control.tar.gz")
+ tarf = tarfile.open("control.tar.gz", "r", tarStream)
+
+ try:
+ control = tarf.extractfile("control")
+ except KeyError:
+ control = tarf.extractfile("./control")
+ else:
+ control = os.popen("tar --wildcards -xzO -f " + fn + " '*control.tar.gz' | tar xfzO - './control'", "r")
+
+ self.read_control(control)
control.close()
- if self.isdeb:
- data = os.popen("ar p "+fn+" data.tar.gz | tar tfz -","r")
- else:
- data = os.popen("tar xfzO "+fn+" '*data.tar.gz' | tar tfz -","r")
- while 1:
- line = data.readline()
- if not line: break
- self.file_list.append(string.rstrip(line))
- data.close()
self.scratch_dir = None
self.file_dir = None
self.meta_dir = None
+ def __getattr__(self, name):
+ if name == "md5":
+ self._computeFileMD5()
+ return self.md5
+ else:
+ raise AttributeError, name
+
+ def _computeFileMD5(self):
+ # compute the MD5.
+ f = open(self.fn, "rb")
+ sum = md5.new()
+ while 1:
+ data = f.read(1024)
+ if not data: break
+ sum.update(data)
+ f.close()
+ self.md5 = sum.hexdigest()
+
def read_control(self, control):
import os
@@ -221,9 +208,15 @@ class Package:
value = value + '\n' + line
if name == 'size':
self.size = int(value)
+ elif name == 'md5sum':
+ self.md5 = value
elif self.__dict__.has_key(name):
self.__dict__[name] = value
- if line[0] == '\n':
+ else:
+ #print "Lost field %s, %s" % (name,value)
+ pass
+
+ if line and line[0] == '\n':
return # consumes one blank line at end of package descriptoin
else:
line = control.readline()
@@ -314,7 +307,27 @@ class Package:
return self.section
def get_file_list(self):
- return self.file_list
+ if not self.fn:
+ return []
+
+ if self.isdeb:
+ f = open(self.fn, "rb")
+ ar = arfile.ArFile(f)
+ tarStream = ar.open("data.tar.gz")
+ tarf = tarfile.open("data.tar.gz", "r", tarStream)
+ self.file_list = tarf.getnames()
+ f.close()
+ else:
+ f = os.popen("tar xfzO " + self.fn + " '*data.tar.gz' | tar tfz -","r")
+ while 1:
+ line = f.readline()
+ if not line: break
+ self.file_list.append(string.rstrip(line))
+ f.close()
+
+ # Make sure that filelist has consistent format regardless of tar version
+ self.file_list = map(lambda a: ["./", ""][a.startswith("./")] + a, self.file_list)
+ return self.file_list
def write_package(self, dirname):
buf = self.render_control()
diff --git a/setup.py b/setup.py
index 1c0c96c..8b28392 100644
--- a/setup.py
+++ b/setup.py
@@ -16,6 +16,6 @@ distutils.core.setup( name = 'ipkg-utils',
platforms = 'POSIX',
keywords = 'ipkg familiar',
url = 'http://www.handhelds.org/sources.html/',
- py_modules = [ 'ipkg' ],
+ py_modules = [ 'ipkg', 'arfile' ],
scripts = ['ipkg-compare-indexes', 'ipkg-make-index', 'ipkg-update-index', 'ipkg-build', 'ipkg-unbuild', 'ipkg-upload']
)