|
| 1 | +# nulls the timestamp filed in a windows .lib archive, |
| 2 | +# making the lib reproducable. |
| 3 | +# the time is the TimeDateStamp in the COFF file header, four bytes at offset 4 |
| 4 | +# See https://blog.conan.io/2019/09/02/Deterministic-builds-with-C-C++.html |
| 5 | +# also: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#archive-library-file-format |
| 6 | + |
| 7 | +import sys |
| 8 | +import struct |
| 9 | + |
| 10 | +libheader = b"!<arch>\n" |
| 11 | + |
| 12 | +def main(): |
| 13 | + infilename = sys.argv[1] |
| 14 | + if len(sys.argv) > 2: |
| 15 | + outfilename = sys.argv[2] |
| 16 | + else: |
| 17 | + outfilename = infilename |
| 18 | + |
| 19 | + with open(infilename, "rb") as fp: |
| 20 | + lib = read_lib(fp, True) |
| 21 | + strip_lib_timestamp(lib) |
| 22 | + with open(outfilename, "wb") as fp: |
| 23 | + write_lib(fp, lib) |
| 24 | + |
| 25 | + |
| 26 | +def read_lib(fp, verbose=False): |
| 27 | + """ |
| 28 | + read microsoft .lib file, |
| 29 | + """ |
| 30 | + # lib file header |
| 31 | + h = fp.read(len(libheader)) |
| 32 | + assert h == libheader |
| 33 | + |
| 34 | + # read first and second link members |
| 35 | + h1 = header_read(fp) |
| 36 | + p = fp.tell() |
| 37 | + if verbose: |
| 38 | + print("header", h1) |
| 39 | + m1 = first_lm_read(fp) |
| 40 | + assert fp.tell() - p == h1["size"] |
| 41 | + if verbose: |
| 42 | + print("first linker member", m1) |
| 43 | + |
| 44 | + h2 = header_read(fp) |
| 45 | + if verbose: |
| 46 | + print("header", h2) |
| 47 | + p = fp.tell() |
| 48 | + m2 = second_lm_read(fp) |
| 49 | + assert fp.tell() - p == h2["size"] |
| 50 | + if verbose: |
| 51 | + print("second linker member", m2) |
| 52 | + |
| 53 | + result = { |
| 54 | + "h1": h1, |
| 55 | + "m1": m1, |
| 56 | + "h2": h2, |
| 57 | + "m2": m2, |
| 58 | + "hl": None, |
| 59 | + "longnames": [], |
| 60 | + "ho": [], |
| 61 | + "o": [], |
| 62 | + } |
| 63 | + |
| 64 | + # now we might have an optional longnames member |
| 65 | + h = header_read(fp) |
| 66 | + if not h: |
| 67 | + return result |
| 68 | + |
| 69 | + if h['name'] == "//": |
| 70 | + result["hl"] = h |
| 71 | + p = fp.tell() |
| 72 | + while fp.tell() < p + h["size"]: |
| 73 | + result["longnames"].append(readcstr(fp)) |
| 74 | + h = None |
| 75 | + if verbose: |
| 76 | + print('header', h) |
| 77 | + print('longnames', result['longnames']) |
| 78 | + |
| 79 | + # now read the headers, possibly we alread read one above. |
| 80 | + while True: |
| 81 | + if h is None: |
| 82 | + h = header_read(fp) |
| 83 | + if h is None: |
| 84 | + return result |
| 85 | + |
| 86 | + result["ho"].append(h) |
| 87 | + result["o"].append(fp.read(h['size'])) |
| 88 | + if verbose: |
| 89 | + print("header:", result['ho'][-1]) |
| 90 | + print("coff:", len(result['o'][-1])) |
| 91 | + h = None |
| 92 | + |
| 93 | + return result |
| 94 | + |
| 95 | + |
| 96 | +def write_lib(fp, lib): |
| 97 | + fp.write(libheader) |
| 98 | + header_write(fp, lib["h1"]) |
| 99 | + first_lm_write(fp, lib["m1"]) |
| 100 | + header_write(fp, lib["h2"]) |
| 101 | + second_lm_write(fp, lib["m2"]) |
| 102 | + |
| 103 | + if lib["hl"]: |
| 104 | + header_write(fp, lib["hl"]) |
| 105 | + for s in lib["longnames"]: |
| 106 | + writecstr(fp, s) |
| 107 | + |
| 108 | + for h, c in zip(lib["ho"], lib["o"]): |
| 109 | + header_write(fp, h) |
| 110 | + fp.write(c) |
| 111 | + |
| 112 | +def strip_lib_timestamp(lib): |
| 113 | + def fix_header(h): |
| 114 | + h['date'] = "-1" |
| 115 | + fix_header(lib['h1']) |
| 116 | + fix_header(lib['h2']) |
| 117 | + if lib['hl']: |
| 118 | + fix_header(lib['hl']) |
| 119 | + for h in lib['ho']: |
| 120 | + fix_header(h) |
| 121 | + lib['o'] = [strip_coff_timestamp(c) for c in lib['o']] |
| 122 | + |
| 123 | + |
| 124 | +def header_read(fp): |
| 125 | + """ |
| 126 | + read a header entry from a microsoft archive |
| 127 | + """ |
| 128 | + |
| 129 | + #header can start with optional newline |
| 130 | + optnl = read_optional_nl(fp) |
| 131 | + |
| 132 | + name = fp.read(16) |
| 133 | + if len(name) < 16: |
| 134 | + return None # eof |
| 135 | + name = name.decode("ascii").strip() |
| 136 | + date = fp.read(12).decode("ascii").strip() |
| 137 | + uid = fp.read(6).decode("ascii").strip() |
| 138 | + gid = fp.read(6).decode("ascii").strip() |
| 139 | + mode = fp.read(8).decode("ascii").strip() |
| 140 | + size = fp.read(10).decode("ascii").strip() |
| 141 | + size = eval(size) |
| 142 | + eoh = fp.read(2) |
| 143 | + assert eoh == b"\x60\x0a" |
| 144 | + return { |
| 145 | + "optnl": optnl, |
| 146 | + "name": name, |
| 147 | + "date": date, |
| 148 | + "uid": uid, |
| 149 | + "gid": gid, |
| 150 | + "mode": mode, |
| 151 | + "size": size, |
| 152 | + } |
| 153 | + |
| 154 | + |
| 155 | +def header_write(fp, h): |
| 156 | + def writestr(s, n): |
| 157 | + """helper to write space padded string of fixed length""" |
| 158 | + e = s.encode("ascii") + b" " * n |
| 159 | + fp.write(e[:n]) |
| 160 | + |
| 161 | + if h["optnl"]: |
| 162 | + fp.write(h['optnl']) |
| 163 | + writestr(h["name"], 16) |
| 164 | + writestr(h["date"], 12) |
| 165 | + writestr(h["uid"], 6) |
| 166 | + writestr(h["gid"], 6) |
| 167 | + writestr(h["mode"], 8) |
| 168 | + writestr(str(h["size"]), 10) |
| 169 | + fp.write(b"\x60\x0a") |
| 170 | + |
| 171 | + |
| 172 | +def first_lm_read(fp): |
| 173 | + nos = fp.read(4) |
| 174 | + nos = struct.unpack(">L", nos)[0] # unsigned long, big-endian |
| 175 | + |
| 176 | + offsets = [] |
| 177 | + strings = [] |
| 178 | + for i in range(nos): |
| 179 | + offset = fp.read(4) |
| 180 | + offsets.append(struct.unpack(">L", offset)[0]) |
| 181 | + for i in range(nos): |
| 182 | + strings.append(readcstr(fp)) |
| 183 | + return {"offsets": offsets, "strings": strings} |
| 184 | + # sometimes there is an extra \0a after the strings |
| 185 | + p = peek(fp) |
| 186 | + return zip(offsets, strings) |
| 187 | + |
| 188 | + |
| 189 | +def first_lm_write(fp, lm): |
| 190 | + nos = len(lm["offsets"]) |
| 191 | + fp.write(struct.pack(">L", nos)) |
| 192 | + for o in lm["offsets"]: |
| 193 | + fp.write(struct.pack(">L", o)) |
| 194 | + for s in lm["strings"]: |
| 195 | + writecstr(fp, s) |
| 196 | + |
| 197 | + |
| 198 | +def second_lm_read(fp): |
| 199 | + # number of members |
| 200 | + m = struct.unpack("<L", fp.read(4))[0] # unsigned long, big-endian |
| 201 | + offsets = [] |
| 202 | + for i in range(m): |
| 203 | + offsets.append(struct.unpack("<L", fp.read(4))[0]) |
| 204 | + |
| 205 | + # number of symbols |
| 206 | + n = struct.unpack("<L", fp.read(4))[0] # unsigned long, big-endian |
| 207 | + indices = [] |
| 208 | + for i in range(n): |
| 209 | + indices.append(struct.unpack("<H", fp.read(2))[0]) # unsigned short |
| 210 | + strings = [] |
| 211 | + for i in range(n): |
| 212 | + strings.append(readcstr(fp)) |
| 213 | + |
| 214 | + return {"offsets": offsets, "indices": indices, "strings": strings} |
| 215 | + |
| 216 | + |
| 217 | +def second_lm_write(fp, lm): |
| 218 | + m = len(lm["offsets"]) |
| 219 | + fp.write(struct.pack("<L", m)) |
| 220 | + for o in lm["offsets"]: |
| 221 | + fp.write(struct.pack("<L", o)) |
| 222 | + n = len(lm["indices"]) |
| 223 | + fp.write(struct.pack("<L", n)) |
| 224 | + for i in lm["indices"]: |
| 225 | + fp.write(struct.pack("<H", i)) |
| 226 | + for s in lm["strings"]: |
| 227 | + writecstr(fp, s) |
| 228 | + |
| 229 | + |
| 230 | +def readcstr(f): |
| 231 | + buf = [] |
| 232 | + while True: |
| 233 | + b = f.read(1) |
| 234 | + if b is None or b == b"\0": |
| 235 | + return b"".join(buf) |
| 236 | + else: |
| 237 | + buf.append(b) |
| 238 | + |
| 239 | + |
| 240 | +def writecstr(f, s): |
| 241 | + f.write(s) |
| 242 | + f.write(b"\0") |
| 243 | + |
| 244 | +def read_optional_nl(fp): |
| 245 | + t = fp.tell() |
| 246 | + c = fp.read(1) |
| 247 | + if c == b'\n': |
| 248 | + return c |
| 249 | + else: |
| 250 | + fp.seek(t) |
| 251 | + |
| 252 | +def peek(fp): |
| 253 | + """ check the next char """ |
| 254 | + t = fp.tell() |
| 255 | + c = fp.read(1) |
| 256 | + fp.seek(t) |
| 257 | + return c |
| 258 | + |
| 259 | +def strip_coff_timestamp(coff, timestamp=0): |
| 260 | + ts = struct.pack("<L", timestamp) |
| 261 | + return coff[:4] + ts + coff[8:] |
| 262 | + |
| 263 | +if __name__ == "__main__": |
| 264 | + main() |
| 265 | + |
| 266 | + |
0 commit comments