From 827caee5a89dbeef9bf4f0e7289c04ad16455687 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Sun, 9 Jul 2023 21:17:01 -0700 Subject: [PATCH 1/5] Use genuine file descriptors on Unix --- Src/IronPython.Modules/nt.cs | 61 +++++++++++++++------ Src/IronPython/Runtime/PythonFileManager.cs | 3 + 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/Src/IronPython.Modules/nt.cs b/Src/IronPython.Modules/nt.cs index f6f920a3c..fac3fc8b2 100644 --- a/Src/IronPython.Modules/nt.cs +++ b/Src/IronPython.Modules/nt.cs @@ -353,7 +353,18 @@ public static int dup(CodeContext/*!*/ context, int fd) { StreamBox streams = fileManager.GetStreams(fd); // OSError if fd not valid fileManager.EnsureRefStreams(streams); fileManager.AddRefStreams(streams); - return fileManager.Add(new(streams)); + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { + return fileManager.Add(new(streams)); + } else { + return fileManager.Add(UnixDup(fd), new(streams)); + } + + // Isolate Mono.Unix from the rest of the method so that we don't try to load the Mono.Unix assembly on Windows. + static int UnixDup(int fd) { + int res = Mono.Unix.Native.Syscall.dup(fd); + if (res < 0) throw GetLastUnixError(); + return res; + } } @@ -377,7 +388,18 @@ public static int dup2(CodeContext/*!*/ context, int fd, int fd2) { fileManager.EnsureRefStreams(streams); fileManager.AddRefStreams(streams); - return fileManager.Add(fd2, new(streams)); + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { + return fileManager.Add(fd2, new(streams)); + } else { + return fileManager.Add(UnixDup2(fd, fd2), new(streams)); + } + + // Isolate Mono.Unix from the rest of the method so that we don't try to load the Mono.Unix assembly on Windows. + static int UnixDup2(int fd, int fd2) { + int res = Mono.Unix.Native.Syscall.dup2(fd, fd2); + if (res < 0) throw GetLastUnixError(); + return res; + } } #if FEATURE_PROCESS @@ -842,7 +864,11 @@ public static object open(CodeContext/*!*/ context, [NotNone] string path, int f fs = new FileStream(path, fileMode, access, FileShare.ReadWrite, DefaultBufferSize, options); } - return context.LanguageContext.FileManager.Add(new(fs)); + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { + return context.LanguageContext.FileManager.Add(new(fs)); + } else { + return context.LanguageContext.FileManager.Add((int)fs.SafeFileHandle.DangerousGetHandle(), new(fs)); + } } catch (Exception e) { throw ToPythonException(e, path); } @@ -877,13 +903,22 @@ private static FileOptions FileOptionsFromFlags(int flag) { #if FEATURE_PIPES - private static Tuple CreatePipeStreams() { - if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { - return CreatePipeStreamsUnix(); - } else { + public static PythonTuple pipe(CodeContext context) { + var manager = context.LanguageContext.FileManager; + + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { var inPipe = new AnonymousPipeServerStream(PipeDirection.In); var outPipe = new AnonymousPipeClientStream(PipeDirection.Out, inPipe.ClientSafePipeHandle); - return Tuple.Create(inPipe, outPipe); + return PythonTuple.MakeTuple( + manager.Add(new(inPipe)), + manager.Add(new(outPipe)) + ); + } else { + var pipeStreams = CreatePipeStreamsUnix(); + return PythonTuple.MakeTuple( + manager.Add((int)pipeStreams.Item1.SafeFileHandle.DangerousGetHandle(), new(pipeStreams.Item1)), + manager.Add((int)pipeStreams.Item2.SafeFileHandle.DangerousGetHandle(), new(pipeStreams.Item2)) + ); } static Tuple CreatePipeStreamsUnix() { @@ -891,16 +926,6 @@ static Tuple CreatePipeStreamsUnix() { return Tuple.Create(pipes.Reading, pipes.Writing); } } - - public static PythonTuple pipe(CodeContext context) { - var pipeStreams = CreatePipeStreams(); - var manager = context.LanguageContext.FileManager; - - return PythonTuple.MakeTuple( - manager.Add(new(pipeStreams.Item1)), - manager.Add(new(pipeStreams.Item2)) - ); - } #endif #if FEATURE_PROCESS diff --git a/Src/IronPython/Runtime/PythonFileManager.cs b/Src/IronPython/Runtime/PythonFileManager.cs index 5070b9ffa..5b1e854d8 100644 --- a/Src/IronPython/Runtime/PythonFileManager.cs +++ b/Src/IronPython/Runtime/PythonFileManager.cs @@ -10,6 +10,7 @@ using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.IO; +using System.Runtime.Versioning; using Microsoft.Scripting.Runtime; using Microsoft.Scripting.Utils; @@ -212,6 +213,7 @@ internal class PythonFileManager { private int _current = _offset; // lowest potentially unused key in _objects at or above _offset private readonly ConcurrentDictionary _refs = new(); + // Mandatory Add for Unix, on Windows only for dup2 case public int Add(int id, StreamBox streams) { ContractUtils.RequiresNotNull(streams, nameof(streams)); ContractUtils.Requires(streams.Id < 0, nameof(streams)); @@ -226,6 +228,7 @@ public int Add(int id, StreamBox streams) { } } + [SupportedOSPlatform("windows")] public int Add(StreamBox streams) { ContractUtils.RequiresNotNull(streams, nameof(streams)); ContractUtils.Requires(streams.Id < 0, nameof(streams)); From 20b06697b8669201b856ec46049f373a1a630cf3 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Fri, 6 Dec 2024 20:11:19 -0800 Subject: [PATCH 2/5] Complete implementing genuine file descriptors --- Src/IronPython.Modules/nt.cs | 90 ++++++++++++++------- Src/IronPython/Modules/_fileio.cs | 2 +- Src/IronPython/Runtime/PythonFileManager.cs | 43 +++++++--- Tests/modules/io_related/test_fd.py | 51 ++++++------ Tests/test_file.py | 6 +- 5 files changed, 127 insertions(+), 65 deletions(-) diff --git a/Src/IronPython.Modules/nt.cs b/Src/IronPython.Modules/nt.cs index fac3fc8b2..ad13a7adf 100644 --- a/Src/IronPython.Modules/nt.cs +++ b/Src/IronPython.Modules/nt.cs @@ -351,19 +351,24 @@ public static int dup(CodeContext/*!*/ context, int fd) { PythonFileManager fileManager = context.LanguageContext.FileManager; StreamBox streams = fileManager.GetStreams(fd); // OSError if fd not valid - fileManager.EnsureRefStreams(streams); - fileManager.AddRefStreams(streams); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { + fileManager.EnsureRefStreams(streams); + fileManager.AddRefStreams(streams); return fileManager.Add(new(streams)); } else { - return fileManager.Add(UnixDup(fd), new(streams)); - } - - // Isolate Mono.Unix from the rest of the method so that we don't try to load the Mono.Unix assembly on Windows. - static int UnixDup(int fd) { - int res = Mono.Unix.Native.Syscall.dup(fd); - if (res < 0) throw GetLastUnixError(); - return res; + if (!streams.IsSingleStream && fd is 1 or 2) { + // If there is a separate write stream, dupping over stout or sderr uses write stream's file descriptor + fd = streams.WriteStream is FileStream fs ? fs.SafeFileHandle.DangerousGetHandle().ToInt32() : fd; + } + int fd2 = UnixDup(fd, -1, out Stream? dupstream); + if (dupstream is not null) { + return fileManager.Add(fd2, new(dupstream)); + } else { + // Share the same set of streams between the original and the dupped descriptor + fileManager.EnsureRefStreams(streams); + fileManager.AddRefStreams(streams); + return fileManager.Add(fd2, new(streams)); + } } } @@ -384,22 +389,45 @@ public static int dup2(CodeContext/*!*/ context, int fd, int fd2) { close(context, fd2); } - // TODO: race condition: `open` or `dup` on another thread may occupy fd2 + // TODO: race condition: `open` or `dup` on another thread may occupy fd2 (simulated desctiptors only) - fileManager.EnsureRefStreams(streams); - fileManager.AddRefStreams(streams); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { + fileManager.EnsureRefStreams(streams); + fileManager.AddRefStreams(streams); return fileManager.Add(fd2, new(streams)); } else { - return fileManager.Add(UnixDup2(fd, fd2), new(streams)); + if (!streams.IsSingleStream && fd is 1 or 2) { + // If there is a separate write stream, dupping over stout or sderr uses write stream's file descriptor + fd = streams.WriteStream is FileStream fs ? fs.SafeFileHandle.DangerousGetHandle().ToInt32() : fd; + } + fd2 = UnixDup(fd, fd2, out Stream? dupstream); // closes fd2 atomically if reopened in the meantime + fileManager.Remove(fd2); + if (dupstream is not null) { + return fileManager.Add(fd2, new(dupstream)); + } else { + // Share the same set of streams between the original and the dupped descriptor + fileManager.EnsureRefStreams(streams); + fileManager.AddRefStreams(streams); + return fileManager.Add(fd2, new(streams)); + } } + } - // Isolate Mono.Unix from the rest of the method so that we don't try to load the Mono.Unix assembly on Windows. - static int UnixDup2(int fd, int fd2) { - int res = Mono.Unix.Native.Syscall.dup2(fd, fd2); - if (res < 0) throw GetLastUnixError(); - return res; + + private static int UnixDup(int fd, int fd2, out Stream? stream) { + int res = fd2 < 0 ? Mono.Unix.Native.Syscall.dup(fd) : Mono.Unix.Native.Syscall.dup2(fd, fd2); + if (res < 0) throw GetLastUnixError(); + if (ClrModule.IsMono) { + // This does not work on .NET, probably because .NET FileStream is not aware of Mono.Unix.UnxiStream + stream = new Mono.Unix.UnixStream(res, ownsHandle: true); + } else { + // This does not work 100% correctly on .NET, probably because each FileStream has its own read/write cursor + // (it should be shared between dupped descriptors) + //stream = new FileStream(new SafeFileHandle((IntPtr)res, ownsHandle: true), FileAccess.ReadWrite); + // Accidentaly, this would also not work on Mono: https://github.com/mono/mono/issues/12783 + stream = null; // Handle stream sharing in PythonFileManager } + return res; } #if FEATURE_PROCESS @@ -849,25 +877,27 @@ public static object open(CodeContext/*!*/ context, [NotNone] string path, int f FileMode fileMode = FileModeFromFlags(flags); FileAccess access = FileAccessFromFlags(flags); FileOptions options = FileOptionsFromFlags(flags); - Stream fs; + Stream s; + FileStream? fs; if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && IsNulFile(path)) { - fs = Stream.Null; + fs = null; + s = Stream.Null; } else if (access == FileAccess.Read && (fileMode == FileMode.CreateNew || fileMode == FileMode.Create || fileMode == FileMode.Append)) { // .NET doesn't allow Create/CreateNew w/ access == Read, so create the file, then close it, then // open it again w/ just read access. fs = new FileStream(path, fileMode, FileAccess.Write, FileShare.None); fs.Close(); - fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, DefaultBufferSize, options); + s = fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, DefaultBufferSize, options); } else if (access == FileAccess.ReadWrite && fileMode == FileMode.Append) { - fs = new FileStream(path, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, DefaultBufferSize, options); + s = fs = new FileStream(path, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, DefaultBufferSize, options); } else { - fs = new FileStream(path, fileMode, access, FileShare.ReadWrite, DefaultBufferSize, options); + s = fs = new FileStream(path, fileMode, access, FileShare.ReadWrite, DefaultBufferSize, options); } if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - return context.LanguageContext.FileManager.Add(new(fs)); + return context.LanguageContext.FileManager.Add(new(s)); } else { - return context.LanguageContext.FileManager.Add((int)fs.SafeFileHandle.DangerousGetHandle(), new(fs)); + return context.LanguageContext.FileManager.Add((int)fs!.SafeFileHandle.DangerousGetHandle(), new(s)); } } catch (Exception e) { throw ToPythonException(e, path); @@ -916,14 +946,14 @@ public static PythonTuple pipe(CodeContext context) { } else { var pipeStreams = CreatePipeStreamsUnix(); return PythonTuple.MakeTuple( - manager.Add((int)pipeStreams.Item1.SafeFileHandle.DangerousGetHandle(), new(pipeStreams.Item1)), - manager.Add((int)pipeStreams.Item2.SafeFileHandle.DangerousGetHandle(), new(pipeStreams.Item2)) + manager.Add(pipeStreams.Item1, new(pipeStreams.Item2)), + manager.Add(pipeStreams.Item3, new(pipeStreams.Item4)) ); } - static Tuple CreatePipeStreamsUnix() { + static Tuple CreatePipeStreamsUnix() { Mono.Unix.UnixPipes pipes = Mono.Unix.UnixPipes.CreatePipes(); - return Tuple.Create(pipes.Reading, pipes.Writing); + return Tuple.Create(pipes.Reading.Handle, pipes.Reading, pipes.Writing.Handle, pipes.Writing); } } #endif diff --git a/Src/IronPython/Modules/_fileio.cs b/Src/IronPython/Modules/_fileio.cs index 1069ea15b..93e2135bf 100644 --- a/Src/IronPython/Modules/_fileio.cs +++ b/Src/IronPython/Modules/_fileio.cs @@ -239,7 +239,7 @@ static Exception BadMode(string mode) { [Documentation("close() -> None. Close the file.\n\n" + "A closed file cannot be used for further I/O operations. close() may be" - + "called more than once without error. Changes the fileno to -1." + + "called more than once without error." )] public override void close(CodeContext/*!*/ context) { if (_closed) { diff --git a/Src/IronPython/Runtime/PythonFileManager.cs b/Src/IronPython/Runtime/PythonFileManager.cs index 5b1e854d8..8f8bcd0e3 100644 --- a/Src/IronPython/Runtime/PythonFileManager.cs +++ b/Src/IronPython/Runtime/PythonFileManager.cs @@ -10,6 +10,7 @@ using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.IO; +using System.Runtime.InteropServices; using System.Runtime.Versioning; using Microsoft.Scripting.Runtime; @@ -87,13 +88,16 @@ public bool IsConsoleStream() { // Isolate Mono.Unix from the rest of the method so that we don't try to load the Mono.Unix assembly on Windows. bool isattyUnix() { - // TODO: console streams may be dupped to differend FD numbers, do not use hard-coded 0, 1, 2 - return StreamType switch { - ConsoleStreamType.Input => Mono.Unix.Native.Syscall.isatty(0), - ConsoleStreamType.Output => Mono.Unix.Native.Syscall.isatty(1), - ConsoleStreamType.ErrorOutput => Mono.Unix.Native.Syscall.isatty(2), - _ => false - }; + if (Id >= 0) { + return Mono.Unix.Native.Syscall.isatty(Id); + } else { + return StreamType switch { + ConsoleStreamType.Input => Mono.Unix.Native.Syscall.isatty(0), + ConsoleStreamType.Output => Mono.Unix.Native.Syscall.isatty(1), + ConsoleStreamType.ErrorOutput => Mono.Unix.Native.Syscall.isatty(2), + _ => false + }; + } } } @@ -213,7 +217,8 @@ internal class PythonFileManager { private int _current = _offset; // lowest potentially unused key in _objects at or above _offset private readonly ConcurrentDictionary _refs = new(); - // Mandatory Add for Unix, on Windows only for dup2 case + // This version of Add is used with genuine file descriptors (Unix). + // Exception: support dup2 on all frameworks/platfroms. public int Add(int id, StreamBox streams) { ContractUtils.RequiresNotNull(streams, nameof(streams)); ContractUtils.Requires(streams.Id < 0, nameof(streams)); @@ -228,6 +233,8 @@ public int Add(int id, StreamBox streams) { } } + // This version of Add is used for emulated file descriptors. + // Must not be used on Unix. [SupportedOSPlatform("windows")] public int Add(StreamBox streams) { ContractUtils.RequiresNotNull(streams, nameof(streams)); @@ -280,7 +287,13 @@ public int GetOrAssignId(StreamBox streams) { lock (_synchObject) { int res = streams.Id; if (res == -1) { - res = Add(streams); + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { + res = Add(streams); + } else { + res = GetGenuineFileDescriptor(streams.ReadStream); + if (res < 0) throw new InvalidOperationException("stream not associated with a file descriptor"); + Add(res, streams); + } } return res; } @@ -313,5 +326,17 @@ public bool DerefStreamsAndCloseIfLast(StreamBox streams) { public bool ValidateFdRange(int fd) { return fd >= 0 && fd < LIMIT_OFILE; } + + [UnsupportedOSPlatform("windows")] + private static int GetGenuineFileDescriptor(Stream stream) { + return stream switch { + FileStream fs => checked((int)fs.SafeFileHandle.DangerousGetHandle()), +#if FEATURE_PIPES + System.IO.Pipes.PipeStream ps => checked((int)ps.SafePipeHandle.DangerousGetHandle()), +#endif + Mono.Unix.UnixStream us => us.Handle, + _ => -1 + }; + } } } diff --git a/Tests/modules/io_related/test_fd.py b/Tests/modules/io_related/test_fd.py index 608643d60..681cdcc46 100644 --- a/Tests/modules/io_related/test_fd.py +++ b/Tests/modules/io_related/test_fd.py @@ -9,7 +9,7 @@ import sys import unittest -from iptest import IronPythonTestCase, is_cli, run_test +from iptest import IronPythonTestCase, is_cli, is_posix, run_test from threading import Timer flags = os.O_CREAT | os.O_TRUNC | os.O_RDWR @@ -45,12 +45,11 @@ def test_dup2(self): # Assigning to self must be a no-op. self.assertEqual(os.dup2(fd, fd), fd if is_cli or sys.version_info >= (3,7) else None) - self.assertTrue(is_open(fd)) # The source must be valid. - self.assertRaisesMessage(OSError, "[Errno 9] Bad file descriptor", os.dup2, -1, fd) + self.assertRaisesMessage(OSError, "[Errno 9] Bad file descriptor" if is_cli or sys.version_info >= (3,5) else "[Errno 0] Error", os.dup2, -1, fd) self.assertTrue(is_open(fd)) - self.assertRaisesMessage(OSError, "[Errno 9] Bad file descriptor", os.dup2, 99, fd) + self.assertRaisesMessage(OSError, "[Errno 9] Bad file descriptor", os.dup2, fd + 10000, fd) self.assertTrue(is_open(fd)) # If the source is not open, then the destination is unaffected. @@ -67,16 +66,16 @@ def test_dup2(self): # Using dup2 can skip fds. self.assertEqual(os.dup2(fd, fd + 2), fd + 2 if is_cli or sys.version_info >= (3,7) else None) self.assertTrue(is_open(fd)) - self.assertTrue(not is_open(fd + 1)) + self.assertFalse(is_open(fd + 1)) self.assertTrue(is_open(fd + 2)) # Verify that dup2 closes the previous occupant of a fd. - self.assertEqual(os.open(os.devnull, os.O_RDWR, 0o600), fd + 1) - self.assertEqual(os.dup2(fd + 1, fd), fd if is_cli or sys.version_info >= (3,7) else None) + fdn = os.open(os.devnull, os.O_RDWR, 0o600) + self.assertEqual(os.dup2(fdn, fd), fd if is_cli or sys.version_info >= (3,7) else None) self.assertTrue(is_open_nul(fd)) - self.assertTrue(is_open_nul(fd + 1)) - os.close(fd + 1) + self.assertTrue(is_open_nul(fdn)) + os.close(fdn) self.assertTrue(is_open_nul(fd)) self.assertEqual(os.write(fd, b"1"), 1) @@ -87,7 +86,7 @@ def test_dup2(self): self.assertEqual(os.read(fd, 1), b"2") os.close(fd) - # fd+1 is already closed + # fdn is already closed os.close(fd + 2) os.unlink(test_filename) @@ -101,29 +100,33 @@ def test_dup(self): # The source must be valid. self.assertRaisesMessage(OSError, "[Errno 9] Bad file descriptor", os.dup, -1) - self.assertRaisesMessage(OSError, "[Errno 9] Bad file descriptor", os.dup, 99) + self.assertRaisesMessage(OSError, "[Errno 9] Bad file descriptor", os.dup, fd1 + 10000) # Test basic functionality. fd2 = os.dup(fd1) - self.assertTrue(fd2 == fd1 + 1) + if not (is_cli and is_posix): + # On IronPython/Posix, the first call to dup or dup2 may load Mono.Unix.dll and the corresponding `.so` + # This makes the fd numbers less predictable + self.assertEqual(fd2, fd1 + 1) self.assertTrue(is_open(fd2)) self.assertTrue(is_open(fd1)) os.close(fd1) - self.assertTrue(not is_open(fd1)) + self.assertFalse(is_open(fd1)) self.assertTrue(is_open(fd2)) - # dup uses the lowest-numbered unused descriptor for the new descriptor. fd3 = os.dup(fd2) - self.assertEqual(fd3, fd1) + # dup uses the lowest-numbered unused descriptor for the new descriptor. + if not (is_cli and is_posix): + self.assertEqual(fd3, fd1) - # writing though the duplicated fd writes to the same file - self.assertEqual(os.write(fd2, b"fd2"), 3) - self.assertEqual(os.write(fd3, b"fd3"), 3) - self.assertEqual(os.write(fd2, b"fd2-again"), 9) + # writing through the duplicated fd writes to the same file + self.assertEqual(os.write(fd2, b"(fd2)"), 5) + self.assertEqual(os.write(fd3, b"(=====fd3=====)"), 15) + self.assertEqual(os.write(fd2, b"(fd2-again)"), 11) os.close(fd3) self.assertEqual(os.lseek(fd2, 0, os.SEEK_SET), 0) - self.assertEqual(os.read(fd2, 15), b"fd2fd3fd2-again") + self.assertEqual(os.read(fd2, 5 + 15 + 11), b"(fd2)(=====fd3=====)(fd2-again)") # cleanup os.close(fd2) @@ -133,20 +136,20 @@ def test_dup_file(self): test_filename = "tmp_%d.dup-file.test" % os.getpid() file1 = open(test_filename, 'w+') - file1.write("file1") + file1.write("(file1)") file1.flush() fd2 = os.dup(file1.fileno()) file2 = open(fd2, 'w+') self.assertNotEqual(file1.fileno(), file2.fileno()) - file2.write("file2") + file2.write("(======file2======)") file2.flush() - file1.write("file1-again") + file1.write("(file1-again)") file1.close() file2.seek(0) - self.assertEqual(file2.read(), "file1file2file1-again") + self.assertEqual(file2.read(), "(file1)(======file2======)(file1-again)") file2.close() os.unlink(test_filename) diff --git a/Tests/test_file.py b/Tests/test_file.py index fea75c0dd..2604495e8 100644 --- a/Tests/test_file.py +++ b/Tests/test_file.py @@ -787,6 +787,10 @@ def test_open_abplus(self): with open(self.temp_file, "ab+") as f: f.write(b"abc") f.seek(0) - self.assertEqual(f.read(), b"abc") + self.assertEqual(f.read(2), b"ab") + f.write(b"def") + self.assertEqual(f.read(2), b"") + f.seek(0) + self.assertEqual(f.read(6), b"abcdef") run_test(__name__) From a2333c4116bf26c5c407f5f526eb0f3d3cc89f69 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Fri, 6 Dec 2024 20:37:13 -0800 Subject: [PATCH 3/5] Add developer's deocumentation --- Documentation/file-descriptors.md | 144 ++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100755 Documentation/file-descriptors.md diff --git a/Documentation/file-descriptors.md b/Documentation/file-descriptors.md new file mode 100755 index 000000000..56d7a1d0a --- /dev/null +++ b/Documentation/file-descriptors.md @@ -0,0 +1,144 @@ +# File Descriptors in IronPython + +## Windows + +The conceptual picture of file descriptors (FDs) usage on Windows, for the most interesting case of `FileStream`: + +```mermaid +graph LR; + +FileIO --> StreamBox --> FileStream --> Handle(Handle) --> OSFile[OS File]; +FD(FD) <--> StreamBox; +``` + +Conceptually, the relationship between `FD` (a number) and `StreamBox` (a class) is bidirectional because `PythonFileManager` (global singleton) maintains the association between the two so it is cost-free to obtaining the one having the other. FD is not the same as the handle, which is created by the OS. FD is an emulated (fake) file descriptor, assigned by the `PythonFileManager`, for the purpose of supporting the Python API. The descriptors are allocated lazily, i.e. only if the user code makes an API call that accesses it. Once assigned, the descriptor does not change. The FD number is released once the FD is closed (or the associated `FileIO` is closed and had `closefd` set to true.) + +It is possible to have the structure above without `FileIO`; for instance when an OS file is opened with one of the low-level functions in `os`, or when an existing FD is duplicated. It is also possible to associate an FD with several `FileIO`. In such cases it is the responsibility of the user code to take care that the FD is closed at the right time. + +When FD is duplicated (using `dup` or `dup2`), the associated `StreamBox` is duplicated (there is always a 1-to1 relationship between FD and `StreamBox`), but the underlying `FileStream` objects remain the same, and so are the underlying OS handles. The new FD may be used to create a `FileIO` (or several, just as the original FD). All read/seek/write operations on both descriptors go though the same `FileStream` object and the same OS handle. + +```mermaid +graph LR; + +FD1(FD1) <--> StreamBox --> FileStream --> Handle(Handle) --> OSFile[OS File]; +FD2(FD2) <--> StreamBox2[StreamBox] --> FileStream; +``` + +The descriptors can be closed independently, and the underlying `FileStream` is closed when the last `StreamBox` using it is closed. + +## Posix + +On Unix-like systems (Linux, maxOS), `FileStream` uses the actual file descriptor as the handle. In the past. IronPython was ignoring this and still issuing its own fake file descriptors as it is in the case of Windows. Now, however, the genuine FD is extracted from the handle and used as FD at the `PythonFileManager` level, ensuring that clients of Python API obtain the genuine FD. + +```mermaid +graph LR; + +FileIO --> StreamBox --> FileStream --> FDH(FD) --> OSFile[OS File]; +FD(FD) <--> StreamBox; +``` + +When descriptor FD is duplicated, the actual OS call is made to create the duplicate FD2. In order to use FD2 directly, a new `Stream` object has to be created around it. + +### Optimal Mechanism + +The optimal solution is to create another `FileStream` using the constructor that accepts an already opened file descriptor. + +```mermaid +graph LR; + +FD1(FD1) <--> StreamBox --> FileStream --> FDH1(FD1) --> OSFile[OS File]; +FD2(FD2) <--> StreamBox2[StreamBox] --> FileStream2[FileStream] --> FDH2(FD2) --> OSFile; +``` + +In this way, the file descriptor on the `PythonFileManager` level is the same as the file descriptor used by `FileStream`. + +Unfortunately, on .NET, somehow, the two independent read/write positions. This is not how duplicated file descriptors should work: both descriptors should point to the same file description structure and share the read/seek/write position. In practice, on .NET, writing through the second file object will overwrite data already written through the first file object. In regular Unix applications (incl. CPython), the subsequent writes append data, regardless which file object is used. The same principle should apply to reads. + +Also unfortunately, on Mono, the `FileStream` constructor accepts only descriptors opened by another call to a `FileStream` constructor[[1]]. So descriptors obtained from direct OS calls, like `open`, `creat`, `dup`, `dup2` are being rejected. + +### Mono Workaround + +To use system-opened file descriptors on Mono `UnixStream` can be used instead of `FileStream`. + +```mermaid +graph LR; + +FD1(FD1) <--> StreamBox --> FileStream --> FDH1(FD1) --> OSFile[OS File]; +FD2(FD2) <--> StreamBox2[StreamBox] --> UnixStream --> FDH2(FD2) --> OSFile; +``` + +Since FileIO works with various types of the underlying `Stream`, using `UnixStream` should be OK. + +Although `UnixStream` is available in .NET through package `Mono.Posix`, this solution still does not work around desynchronized read/write position, which `FileStream` using the original FD1 must somehow maintain independently. + +### .NET Workaround + +To ensure proper R/W behavior on .NET, operations on both file descriptions have to go though the same `FileStream` object. Since the duplicated file descriptor is basically just a number, pointing to the same file description as the original descriptor, on the OS level it doesn't matter which descriptor is used for operations. The only difference between those descriptors is flag `O_CLOEXEC`, which determines whether the descriptor stays open or not when child processed are executed. + +```mermaid +graph LR; + +FD1(FD1) <--> StreamBox --> FileStream --> FDH1(FD1) --> OSFile[OS File]; +FD2(FD2) <--> StreamBox2[StreamBox] --> FileStream; +FDH2(FD2) --> OSFile; +``` + +This actually works OK, until `dup2` is used. When the FD1 descriptor (or the associated `FileIO`) is closed on the Python API level, the underlying OS descriptor is not released but still being used by `FileStream`. A small side effect is that it will not be reused until FD2 is closed, but other than that, the behaviour is as expected. + +```mermaid +graph LR; + +FileStream --> FDH1(FD1) --> OSFile[OS File]; +FD2(FD2) <--> StreamBox2[StreamBox] --> FileStream; +FDH2(FD2) --> OSFile; +``` + +The problem arises when `dup2` is used with the target being FD1. This will forcibly close the descriptor used by `FileStream`, rendering the stream broken, despite having FD2 available. Perhaps closing `FileStream` using FD1 and opening a replacement around FD2 could be a solution, but this would have to be done atomically. If so, this would lead to a healthy structure. + +```mermaid +graph LR; + +FileStream --> FDH2(FD2); +FD2(FD2) <--> StreamBox2[StreamBox] --> FileStream; +FDH2(FD2) --> OSFile; +``` + + +## Practical Scenarios + +None of the above solutions is fully satisfactory for .NET. Ideally, .NET would behave consistently with Posix, because even the most elaborate workarounds (like juggling various `FileStream` objects around the descriptors) only work within IronPython, and break down when a descriptor is passed to a 3rd party library that uses C extension and creates its own `FILE*` struct around it. The `FileStream` object in .NET knows nothing about it and will not adjust its R/W position. + +In the meantime, let's look at some practical cases when `dup`/`dup2` are used and try to support just these. For what I have seen, `dup`/`dup2` are commonly used to redirect some of the standard descriptors. For example, to redirect standard output to a file: +1. Open a file for writing, it will get assigned descriptor FD1. +2. Copy descriptor 1 aside using `dup`. The copy will get assigned descriptor FD2. +3. Copy the open file descriptor FD1 onto descriptor 1 using `dup2`. This will forcibly close the existing descriptor 1, but not the output stream, which is sill accessible through descriptor FD2. +4. Code writing to "standard output", i.e. descriptor 1, will now write to the open file. +5. If needed, the application can still write to the original output stream by writing to descriptor FD2. +6. When done, close descriptor FD1. +7. Copy descriptor FD2 onto descriptor 1 using `dup2`. Since the is the last one pointing to the open file, the file will be closed as well. +8. Close descriptor FD2, the copy is not needed anymore. + +The same scenario is commonly done for standard input and sometimes standard error. + +The problem of .NET manifests itself when there are two descriptors open that refer to the same open file description and used concurrently. In the above scenario it is descriptor 1 and FD1. Assuming that the application is not using FD1 (typical use), the _Optimal Mechanism_ described above is sufficient. + +If the application does insist on using both descriptors 1 and FD1, the first .NET workaround is needed. This will lead to the following structure: + +```mermaid +graph LR; + +FD1(FD1) <--> StreamBox --> FileStream --> FDH1(FD1) --> OSFile[OS File]; +D1(1) <--> StreamBox2[StreamBox] --> FileStream; +DH1(1) --> OSFile; +FD2(FD2) <--> StreamBox3[StreamBox] --> FileStream2[FileStream] --> FDH2(FD2) --> stdout +``` + +The problem of closing FD1 and then overwriting it is not an issue, since only standard descriptors (0, 1, 2) are being overwritten with `dup2`. There is still a problem of overwriting data written by C extension code writing though descriptor 1. Perhaps replacing `FileStream` utilizing FD1 with `UnixStream` from Mono would make it more cooperative. + +In the end, the implementation of genuine file descriptors in IronPython starts with the simple solution (the simple workarounds described above) and will be adjusted as needed to support the 3rd party Python packages. + +## Special Case: Double Stream + +In Python, a file can be opened with mode "ab+". The file is opened for appending to the end (created if not exists), and the `+` means that it is also opened for updating. i.e. reading and writing. The file pointer is initially set at the end of the file (ready to write to append) but can be moved around to read already existing data. However, each write will append data to the end and reset the read/write pointer at the end again. In IronPython this is simulated by using two file streams, one for reading and one fore writing. Both are maintained in a single `StreamBox` but will have different file descriptors. This is subject to change. + +[1]: https://github.com/mono/mono/issues/12783 From cc922355fd4a4428384efa60369a7293ccbc49f7 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Mon, 9 Dec 2024 21:28:27 -0800 Subject: [PATCH 4/5] Update after review --- Documentation/file-descriptors.md | 4 ++-- Src/IronPython.Modules/nt.cs | 14 +++++++------- Src/IronPython/Runtime/PythonFileManager.cs | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Documentation/file-descriptors.md b/Documentation/file-descriptors.md index 56d7a1d0a..da884bb75 100755 --- a/Documentation/file-descriptors.md +++ b/Documentation/file-descriptors.md @@ -15,7 +15,7 @@ Conceptually, the relationship between `FD` (a number) and `StreamBox` (a class) It is possible to have the structure above without `FileIO`; for instance when an OS file is opened with one of the low-level functions in `os`, or when an existing FD is duplicated. It is also possible to associate an FD with several `FileIO`. In such cases it is the responsibility of the user code to take care that the FD is closed at the right time. -When FD is duplicated (using `dup` or `dup2`), the associated `StreamBox` is duplicated (there is always a 1-to1 relationship between FD and `StreamBox`), but the underlying `FileStream` objects remain the same, and so are the underlying OS handles. The new FD may be used to create a `FileIO` (or several, just as the original FD). All read/seek/write operations on both descriptors go though the same `FileStream` object and the same OS handle. +When FD is duplicated (using `dup` or `dup2`), the associated `StreamBox` is duplicated (there is always a 1-to-1 relationship between FD and `StreamBox`), but the underlying `FileStream` objects remain the same, and so are the underlying OS handles. The new FD may be used to create a `FileIO` (or several, just as the original FD). All read/seek/write operations on both descriptors go though the same `FileStream` object and the same OS handle. ```mermaid graph LR; @@ -52,7 +52,7 @@ FD2(FD2) <--> StreamBox2[StreamBox] --> FileStream2[FileStream] --> FDH2(FD2) -- In this way, the file descriptor on the `PythonFileManager` level is the same as the file descriptor used by `FileStream`. -Unfortunately, on .NET, somehow, the two independent read/write positions. This is not how duplicated file descriptors should work: both descriptors should point to the same file description structure and share the read/seek/write position. In practice, on .NET, writing through the second file object will overwrite data already written through the first file object. In regular Unix applications (incl. CPython), the subsequent writes append data, regardless which file object is used. The same principle should apply to reads. +Unfortunately, on .NET, somehow, two `FileStream` instances using the same file descriptor will have the two independent read/write positions. This is not how duplicated file descriptors should work: both descriptors should point to the same file description structure and share the read/seek/write position. In practice, on .NET, writing through the second file object will overwrite data already written through the first file object. In regular Unix applications (incl. CPython), the subsequent writes append data, regardless which file object is used. The same principle should apply to reads. Also unfortunately, on Mono, the `FileStream` constructor accepts only descriptors opened by another call to a `FileStream` constructor[[1]]. So descriptors obtained from direct OS calls, like `open`, `creat`, `dup`, `dup2` are being rejected. diff --git a/Src/IronPython.Modules/nt.cs b/Src/IronPython.Modules/nt.cs index ad13a7adf..8d91d5438 100644 --- a/Src/IronPython.Modules/nt.cs +++ b/Src/IronPython.Modules/nt.cs @@ -351,11 +351,7 @@ public static int dup(CodeContext/*!*/ context, int fd) { PythonFileManager fileManager = context.LanguageContext.FileManager; StreamBox streams = fileManager.GetStreams(fd); // OSError if fd not valid - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { - fileManager.EnsureRefStreams(streams); - fileManager.AddRefStreams(streams); - return fileManager.Add(new(streams)); - } else { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { if (!streams.IsSingleStream && fd is 1 or 2) { // If there is a separate write stream, dupping over stout or sderr uses write stream's file descriptor fd = streams.WriteStream is FileStream fs ? fs.SafeFileHandle.DangerousGetHandle().ToInt32() : fd; @@ -369,6 +365,10 @@ public static int dup(CodeContext/*!*/ context, int fd) { fileManager.AddRefStreams(streams); return fileManager.Add(fd2, new(streams)); } + } else { + fileManager.EnsureRefStreams(streams); + fileManager.AddRefStreams(streams); + return fileManager.Add(new(streams)); } } @@ -389,7 +389,7 @@ public static int dup2(CodeContext/*!*/ context, int fd, int fd2) { close(context, fd2); } - // TODO: race condition: `open` or `dup` on another thread may occupy fd2 (simulated desctiptors only) + // TODO: race condition: `open` or `dup` on another thread may occupy fd2 (simulated descriptors only) if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { fileManager.EnsureRefStreams(streams); @@ -418,7 +418,7 @@ private static int UnixDup(int fd, int fd2, out Stream? stream) { int res = fd2 < 0 ? Mono.Unix.Native.Syscall.dup(fd) : Mono.Unix.Native.Syscall.dup2(fd, fd2); if (res < 0) throw GetLastUnixError(); if (ClrModule.IsMono) { - // This does not work on .NET, probably because .NET FileStream is not aware of Mono.Unix.UnxiStream + // This does not work on .NET, probably because .NET FileStream is not aware of Mono.Unix.UnixStream stream = new Mono.Unix.UnixStream(res, ownsHandle: true); } else { // This does not work 100% correctly on .NET, probably because each FileStream has its own read/write cursor diff --git a/Src/IronPython/Runtime/PythonFileManager.cs b/Src/IronPython/Runtime/PythonFileManager.cs index 8f8bcd0e3..64a81bb9b 100644 --- a/Src/IronPython/Runtime/PythonFileManager.cs +++ b/Src/IronPython/Runtime/PythonFileManager.cs @@ -234,8 +234,8 @@ public int Add(int id, StreamBox streams) { } // This version of Add is used for emulated file descriptors. - // Must not be used on Unix. - [SupportedOSPlatform("windows")] + // Must not be used on POSIX. + [UnsupportedOSPlatform("linux"), UnsupportedOSPlatform("macos")] public int Add(StreamBox streams) { ContractUtils.RequiresNotNull(streams, nameof(streams)); ContractUtils.Requires(streams.Id < 0, nameof(streams)); From 380ab36b69e4920f9640fad0a02fe713d08adf39 Mon Sep 17 00:00:00 2001 From: Pavel Koneski Date: Mon, 9 Dec 2024 22:01:21 -0800 Subject: [PATCH 5/5] Add wbplus file test --- Tests/test_file.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Tests/test_file.py b/Tests/test_file.py index 2604495e8..97cc97e36 100644 --- a/Tests/test_file.py +++ b/Tests/test_file.py @@ -783,6 +783,23 @@ def test_opener_uncallable(self): self.assertRaises(TypeError, open, "", "r", opener=uncallable_opener) + def test_open_wbplus(self): + with open(self.temp_file, "wb+") as f: + f.write(b"abc") + f.seek(0) + self.assertEqual(f.read(2), b"ab") + f.write(b"def") + self.assertEqual(f.read(2), b"") + f.seek(0) + self.assertEqual(f.read(6), b"abdef") + f.seek(0) + self.assertEqual(f.read(2), b"ab") + f.fileno() # does not move the file pointer + self.assertEqual(f.read(2), b"de") + f.write(b"z") + f.seek(0) + self.assertEqual(f.read(), b"abdez") + def test_open_abplus(self): with open(self.temp_file, "ab+") as f: f.write(b"abc")