summaryrefslogtreecommitdiffstats
path: root/patches/ipkg-utils-050831/0003-ipkg-py-tarfile.patch
blob: cd5c7970249dc0e1a492205585cd708bf9b8edba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
From: the openembedded project <http://openembedded.net/>
Date: Fri, 22 Jan 2010 11:05:07 +0100
Subject: [PATCH] ipkg-py-tarfile

---
 arfile.py |  124 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 ipkg.py   |  121 +++++++++++++++++++++++++++++++++--------------------------
 setup.py  |    2 +-
 3 files changed, 192 insertions(+), 55 deletions(-)
 create mode 100644 arfile.py

diff --git a/arfile.py b/arfile.py
new file mode 100644
index 0000000..0bcd8d0
--- /dev/null
+++ b/arfile.py
@@ -0,0 +1,124 @@
+"""
+arfile - A module to parse GNU ar archives.
+
+Copyright (c) 2006-7 Paul Sokolovsky
+This file is released under the terms 
+of GNU General Public License v2 or later.
+"""
+import sys
+import os
+import tarfile 
+
+
+class FileSection:
+    "A class which allows to treat portion of file as separate file object."
+
+    def __init__(self, f, offset, size):
+        self.f = f
+        self.offset = offset
+        self.size = size
+        self.seek(0, 0)
+
+    def seek(self, offset, whence = 0):
+#        print "seek(%x, %d)" % (offset, whence)
+        if whence == 0:
+            return self.f.seek(offset + self.offset, whence)
+        elif whence == 1:
+            return self.f.seek(offset, whence)
+        elif whence == 2:
+            return self.f.seek(self.offset + self.size + offset, 0)
+        else:
+            assert False
+
+    def tell(self):
+#        print "tell()"
+        return self.f.tell() - self.offset
+
+    def read(self, size = -1):
+#        print "read(%d)" % size
+        return self.f.read(size)
+
+class ArFile:
+
+    def __init__(self, f):
+        self.f = f
+        self.directory = {}
+        self.directoryRead = False
+
+        signature = self.f.readline()
+        assert signature == "!<arch>\n"
+        self.directoryOffset = self.f.tell()
+
+    def open(self, fname):
+        if self.directory.has_key(fname):
+            return FileSection(self.f, self.directory[fname][-1], int(self.directory[fname][5]))
+
+        if self.directoryRead:
+            raise IOError, (2, "AR member not found: " + fname)
+
+        f = self._scan(fname)
+        if f == None:
+            raise IOError, (2, "AR member not found: " + fname)
+        return f
+
+
+    def _scan(self, fname):
+        self.f.seek(self.directoryOffset, 0)
+
+        while True:
+            l = self.f.readline()
+            if not l: 
+                self.directoryRead = True
+                return None
+
+            if l == "\n":
+                l = self.f.readline()
+                if not l: break
+            descriptor = l.split()
+#            print descriptor
+            size = int(descriptor[5])
+            memberName = descriptor[0][:-1]
+            self.directory[memberName] = descriptor + [self.f.tell()]
+#            print "read:", memberName
+            if memberName == fname or (memberName.startswith("`") and memberName[1:] == fname):
+                # Record directory offset to start from next time
+                self.directoryOffset = self.f.tell() + size
+                return FileSection(self.f, self.f.tell(), size)
+
+            # Skip data and loop
+            data = self.f.seek(size, 1)
+#            print hex(f.tell())
+
+
+if __name__ == "__main__":
+    if None:
+        f = open(sys.argv[1], "rb")
+
+        ar = ArFile(f)
+        tarStream = ar.open("data.tar.gz")
+        print "--------"
+        tarStream = ar.open("data.tar.gz")
+        print "--------"
+        tarStream = ar.open("control.tar.gz")
+        print "--------"
+        tarStream = ar.open("control.tar.gz2")
+
+        sys.exit(0)
+
+
+    dir = "."
+    if len(sys.argv) > 1:
+        dir = sys.argv[1]
+    for f in os.listdir(dir):
+        if not f.endswith(".ipk"): continue
+
+        print "=== %s ===" % f
+        f = open(dir + "/" + f, "rb")
+
+        ar = ArFile(f)
+        tarStream = ar.open("control.tar.gz")
+        tarf = tarfile.open("control.tar.gz", "r", tarStream)
+        #tarf.list()
+
+        f2 = tarf.extractfile("control")
+        print f2.read()
diff --git a/ipkg.py b/ipkg.py
index 824ba4d..67c2aa3 100644
--- a/ipkg.py
+++ b/ipkg.py
@@ -41,6 +41,8 @@ import re
 import string
 import commands
 from stat import ST_SIZE
+import arfile
+import tarfile
 
 class Version:
     """A class for holding parsed package version information."""
@@ -131,78 +133,63 @@ class Package:
 	self.section = None
         self.filename_header = None
 	self.file_list = []
-        self.md5 = None
+        # md5 is lazy attribute, computed on demand
+        #self.md5 = None
         self.size = None
         self.installed_size = None
         self.filename = None
         self.isdeb = 0
+        self.fn = fn
 
 	if fn:
             # see if it is deb format
-            f = open(fn, "r")
+            f = open(fn, "rb")
             magic = f.read(4)
-            f.close()
+            f.seek(0, 0)
             if (magic == "!<ar"):
                 self.isdeb = 1
 
-            # compute the MD5.
-            f = open(fn, "r")
-            sum = md5.new()
-            while 1:
-                data = f.read(1024)
-                if not data: break
-                sum.update(data)
-            f.close()
-            if sys.version[:1] > '2':
-                # when using Python 2.0 or newer
-                self.md5 = sum.hexdigest() 
-            else:
-                self.md5 = string.join(map((lambda x:"%02x" % ord(x)),sum.digest()),'')
             stat = os.stat(fn)
-            self.size = stat[ST_SIZE]
+            self.size = stat[ST_SIZE]    
             self.filename = os.path.basename(fn)
 	    ## sys.stderr.write("  extracting control.tar.gz from %s\n"% (fn,)) 
-            if self.isdeb:
-                control = os.popen("ar p "+fn+" control.tar.gz | tar xfzO - './control'","r")
-            else:
-                control = os.popen("tar xfzO "+fn+" 'control.tar.gz' | tar xfzO - './control'","r")
-            line = control.readline()
-            while 1:
-                if not line: break
-                line = string.rstrip(line)
-                lineparts = re.match(r'([\w-]*?):\s*(.*)', line)
-		if lineparts:
-                    name = string.lower(lineparts.group(1))
-		    value = lineparts.group(2)
-		    while 1:
-			line = control.readline()
-			if not line: break
-			if line[0] != ' ': break
-                        line = string.rstrip(line)
-			value = value + '\n' + line
-                    # don't allow package to override its own filename
-                    if name == "filename":
-                        self.filename_header = value
-                    else:
-                        if self.__dict__.has_key(name):
-                            self.__dict__[name] = value
-                else:
-                    line = control.readline()
+	    if self.isdeb:
+        	ar = arfile.ArFile(f)
+        	tarStream = ar.open("control.tar.gz")
+        	tarf = tarfile.open("control.tar.gz", "r", tarStream)
+
+        	try:
+        	    control = tarf.extractfile("control")
+        	except KeyError:
+        	    control = tarf.extractfile("./control")
+	    else:
+		control = os.popen("tar --wildcards -xzO -f " + fn + " '*control.tar.gz' | tar xfzO - './control'", "r")
+
+            self.read_control(control)
             control.close()
-            if self.isdeb:
-                data = os.popen("ar p "+fn+" data.tar.gz | tar tfz -","r")
-            else:
-                data = os.popen("tar xfzO "+fn+" '*data.tar.gz' | tar tfz -","r")
-            while 1:
-                line = data.readline()
-                if not line: break
-                self.file_list.append(string.rstrip(line))
-            data.close()
 
 	self.scratch_dir = None
 	self.file_dir = None
 	self.meta_dir = None
 
+    def __getattr__(self, name):
+        if name == "md5":
+            self._computeFileMD5()
+            return self.md5
+        else:
+            raise AttributeError, name
+
+    def _computeFileMD5(self):
+        # compute the MD5.
+        f = open(self.fn, "rb")
+        sum = md5.new()
+        while 1:
+            data = f.read(1024)
+            if not data: break
+            sum.update(data)
+        f.close()
+        self.md5 = sum.hexdigest()
+
     def read_control(self, control):
         import os
 
@@ -221,9 +208,15 @@ class Package:
                     value = value + '\n' + line
                 if name == 'size':
                     self.size = int(value)
+	        elif name == 'md5sum':
+                    self.md5 = value
                 elif self.__dict__.has_key(name):
                     self.__dict__[name] = value
-                if line[0] == '\n':
+		else:
+		    #print "Lost field %s, %s" % (name,value)
+                    pass
+
+                if line and line[0] == '\n':
                     return # consumes one blank line at end of package descriptoin
             else:
                 line = control.readline()
@@ -314,7 +307,27 @@ class Package:
 	return self.section
 
     def get_file_list(self):
-	return self.file_list
+        if not self.fn:
+            return []
+	
+	if self.isdeb:
+    	    f = open(self.fn, "rb")
+    	    ar = arfile.ArFile(f)
+    	    tarStream = ar.open("data.tar.gz")
+    	    tarf = tarfile.open("data.tar.gz", "r", tarStream)
+    	    self.file_list = tarf.getnames()
+    	    f.close()
+	else:
+            f = os.popen("tar xfzO " + self.fn + " '*data.tar.gz' | tar tfz -","r") 
+            while 1: 
+                line = f.readline() 
+                if not line: break 
+                self.file_list.append(string.rstrip(line)) 
+            f.close() 
+
+        # Make sure that filelist has consistent format regardless of tar version
+        self.file_list = map(lambda a: ["./", ""][a.startswith("./")] + a, self.file_list)
+        return self.file_list
 
     def write_package(self, dirname):
         buf = self.render_control()
diff --git a/setup.py b/setup.py
index 1c0c96c..8b28392 100644
--- a/setup.py
+++ b/setup.py
@@ -16,6 +16,6 @@ distutils.core.setup( name = 'ipkg-utils',
 		      platforms = 'POSIX',
 		      keywords = 'ipkg familiar',
                       url = 'http://www.handhelds.org/sources.html/',
-                      py_modules = [ 'ipkg' ],
+                      py_modules = [ 'ipkg', 'arfile' ],
 		      scripts = ['ipkg-compare-indexes', 'ipkg-make-index', 'ipkg-update-index', 'ipkg-build', 'ipkg-unbuild', 'ipkg-upload']
                       )
-- 
1.7.2.3