Skip to content

Commit 1eb8da3

Browse files
committed
Add features to the windows lib stripper.
1 parent bc6dd50 commit 1eb8da3

File tree

1 file changed

+79
-28
lines changed

1 file changed

+79
-28
lines changed

tools/strip-lib.py

Lines changed: 79 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,18 @@
33
# the time is the TimeDateStamp in the COFF file header, four bytes at offset 4
44
# See https://blog.conan.io/2019/09/02/Deterministic-builds-with-C-C++.html
55
# also: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#archive-library-file-format
6+
#
7+
# There are some additional fixes added for reproducability, such as fixing the zero-padding of names in the coff
8+
# section headers.
69

710
import sys
811
import struct
912

13+
verbose = True
14+
1015
libheader = b"!<arch>\n"
1116

17+
1218
def main():
1319
infilename = sys.argv[1]
1420
if len(sys.argv) > 2:
@@ -17,13 +23,13 @@ def main():
1723
outfilename = infilename
1824

1925
with open(infilename, "rb") as fp:
20-
lib = read_lib(fp, True)
26+
lib = read_lib(fp)
2127
strip_lib_timestamp(lib)
2228
with open(outfilename, "wb") as fp:
2329
write_lib(fp, lib)
2430

2531

26-
def read_lib(fp, verbose=False):
32+
def read_lib(fp):
2733
"""
2834
read microsoft .lib file,
2935
"""
@@ -40,7 +46,7 @@ def read_lib(fp, verbose=False):
4046
assert fp.tell() - p == h1["size"]
4147
if verbose:
4248
print("first linker member", m1)
43-
49+
4450
h2 = header_read(fp)
4551
if verbose:
4652
print("header", h2)
@@ -66,15 +72,15 @@ def read_lib(fp, verbose=False):
6672
if not h:
6773
return result
6874

69-
if h['name'] == "//":
75+
if h["name"] == "//":
7076
result["hl"] = h
7177
p = fp.tell()
7278
while fp.tell() < p + h["size"]:
7379
result["longnames"].append(readcstr(fp))
74-
h = None
7580
if verbose:
76-
print('header', h)
77-
print('longnames', result['longnames'])
81+
print("header", h)
82+
print("longnames", result["longnames"])
83+
h = None
7884

7985
# now read the headers, possibly we alread read one above.
8086
while True:
@@ -84,10 +90,10 @@ def read_lib(fp, verbose=False):
8490
return result
8591

8692
result["ho"].append(h)
87-
result["o"].append(fp.read(h['size']))
93+
result["o"].append(fp.read(h["size"]))
8894
if verbose:
89-
print("header:", result['ho'][-1])
90-
print("coff:", len(result['o'][-1]))
95+
print("header:", result["ho"][-1])
96+
print("coff length:", len(result["o"][-1]))
9197
h = None
9298

9399
return result
@@ -109,24 +115,27 @@ def write_lib(fp, lib):
109115
header_write(fp, h)
110116
fp.write(c)
111117

118+
112119
def strip_lib_timestamp(lib):
113120
def fix_header(h):
114-
h['date'] = "-1"
115-
fix_header(lib['h1'])
116-
fix_header(lib['h2'])
117-
if lib['hl']:
118-
fix_header(lib['hl'])
119-
for h in lib['ho']:
121+
h["date"] = "-1"
122+
123+
fix_header(lib["h1"])
124+
fix_header(lib["h2"])
125+
if lib["hl"]:
126+
fix_header(lib["hl"])
127+
for h in lib["ho"]:
120128
fix_header(h)
121-
lib['o'] = [strip_coff_timestamp(c) for c in lib['o']]
129+
lib["o"] = [strip_coff_timestamp(c) for c in lib["o"]]
130+
lib["o"] = [fix_coff_null_padding(c) for c in lib["o"]]
122131

123132

124133
def header_read(fp):
125134
"""
126135
read a header entry from a microsoft archive
127136
"""
128137

129-
#header can start with optional newline
138+
# header can start with optional newline
130139
optnl = read_optional_nl(fp)
131140

132141
name = fp.read(16)
@@ -159,7 +168,7 @@ def writestr(s, n):
159168
fp.write(e[:n])
160169

161170
if h["optnl"]:
162-
fp.write(h['optnl'])
171+
fp.write(h["optnl"])
163172
writestr(h["name"], 16)
164173
writestr(h["date"], 12)
165174
writestr(h["uid"], 6)
@@ -193,7 +202,7 @@ def first_lm_write(fp, lm):
193202
fp.write(struct.pack(">L", o))
194203
for s in lm["strings"]:
195204
writecstr(fp, s)
196-
205+
197206

198207
def second_lm_read(fp):
199208
# number of members
@@ -210,7 +219,7 @@ def second_lm_read(fp):
210219
strings = []
211220
for i in range(n):
212221
strings.append(readcstr(fp))
213-
222+
214223
return {"offsets": offsets, "indices": indices, "strings": strings}
215224

216225

@@ -225,7 +234,7 @@ def second_lm_write(fp, lm):
225234
fp.write(struct.pack("<H", i))
226235
for s in lm["strings"]:
227236
writecstr(fp, s)
228-
237+
229238

230239
def readcstr(f):
231240
buf = []
@@ -235,32 +244,74 @@ def readcstr(f):
235244
return b"".join(buf)
236245
else:
237246
buf.append(b)
238-
247+
239248

240249
def writecstr(f, s):
241250
f.write(s)
242251
f.write(b"\0")
243252

253+
244254
def read_optional_nl(fp):
245255
t = fp.tell()
246256
c = fp.read(1)
247-
if c == b'\n':
257+
if c == b"\n":
248258
return c
249259
else:
250260
fp.seek(t)
251261

262+
252263
def peek(fp):
253264
""" check the next char """
254265
t = fp.tell()
255266
c = fp.read(1)
256267
fp.seek(t)
257268
return c
258269

270+
259271
def strip_coff_timestamp(coff, timestamp=0):
260-
ts = struct.pack("<L", timestamp)
261-
return coff[:4] + ts + coff[8:]
272+
old = struct.unpack("<L", coff[4:8])[0]
273+
if timestamp != old:
274+
ts = struct.pack("<L", timestamp)
275+
coff = coff[:4] + ts + coff[8:]
276+
if verbose:
277+
print("replaced coff timestamp %r with %r" % (old, timestamp))
278+
return coff
262279

263-
if __name__ == "__main__":
264-
main()
280+
281+
def fix_coff_null_padding(coff):
282+
"""
283+
Section headers in coff files start with a 8 byte null padded field.
284+
Some compilers don't set all the nulls to zero
285+
"""
286+
header = coff[:20]
287+
n_sections = struct.unpack("<H", header[2:4])[0]
288+
sections = []
289+
for i in range(n_sections):
290+
# section headers start after header, each is 40 bytes
291+
start = 20 + i * 40
292+
sections.append(coff[start : start + 40])
293+
294+
modified = False
295+
for n, s in enumerate(sections):
296+
name = s[:8]
297+
# find first null in name
298+
i = name.find(b"\0")
299+
if i >= 0:
300+
# everything after first null is null
301+
shortname = name[:i]
302+
namenew = (shortname + b"\0" * 8)[:8]
303+
if name != namenew:
304+
sections[n] = namenew + s[8:]
305+
modified = True
306+
if verbose:
307+
print(
308+
"Fixed null padding of COFF section header name %r" % shortname
309+
)
310+
if modified:
311+
start = header + b"".join(sections)
312+
coff = start + coff[len(start) :]
313+
return coff
265314

266315

316+
if __name__ == "__main__":
317+
main()

0 commit comments

Comments
 (0)